OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches

Functions

subroutine zgejsv (joba, jobu, jobv, jobr, jobt, jobp, m, n, a, lda, sva, u, ldu, v, ldv, cwork, lwork, rwork, lrwork, iwork, info)
 ZGEJSV
subroutine zgesdd (jobz, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, rwork, iwork, info)
 ZGESDD
subroutine zgesvd (jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, rwork, info)
  ZGESVD computes the singular value decomposition (SVD) for GE matrices
subroutine zgesvdq (joba, jobp, jobr, jobu, jobv, m, n, a, lda, s, u, ldu, v, ldv, numrank, iwork, liwork, cwork, lcwork, rwork, lrwork, info)
  ZGESVDQ computes the singular value decomposition (SVD) with a QR-Preconditioned QR SVD Method for GE matrices
subroutine zgesvdx (jobu, jobvt, range, m, n, a, lda, vl, vu, il, iu, ns, s, u, ldu, vt, ldvt, work, lwork, rwork, iwork, info)
  ZGESVDX computes the singular value decomposition (SVD) for GE matrices
subroutine zggsvd3 (jobu, jobv, jobq, m, n, p, k, l, a, lda, b, ldb, alpha, beta, u, ldu, v, ldv, q, ldq, work, lwork, rwork, iwork, info)
  ZGGSVD3 computes the singular value decomposition (SVD) for OTHER matrices

Detailed Description

This is the group of complex16 singular value driver functions for GE matrices

Function Documentation

◆ zgejsv()

subroutine zgejsv ( character*1 joba,
character*1 jobu,
character*1 jobv,
character*1 jobr,
character*1 jobt,
character*1 jobp,
integer m,
integer n,
complex*16, dimension( lda, * ) a,
integer lda,
double precision, dimension( n ) sva,
complex*16, dimension( ldu, * ) u,
integer ldu,
complex*16, dimension( ldv, * ) v,
integer ldv,
complex*16, dimension( lwork ) cwork,
integer lwork,
double precision, dimension( lrwork ) rwork,
integer lrwork,
integer, dimension( * ) iwork,
integer info )

ZGEJSV

Download ZGEJSV + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> ZGEJSV computes the singular value decomposition (SVD) of a complex M-by-N
!> matrix [A], where M >= N. The SVD of [A] is written as
!>
!>              [A] = [U] * [SIGMA] * [V]^*,
!>
!> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N
!> diagonal elements, [U] is an M-by-N (or M-by-M) unitary matrix, and
!> [V] is an N-by-N unitary matrix. The diagonal elements of [SIGMA] are
!> the singular values of [A]. The columns of [U] and [V] are the left and
!> the right singular vectors of [A], respectively. The matrices [U] and [V]
!> are computed and stored in the arrays U and V, respectively. The diagonal
!> of [SIGMA] is computed and stored in the array SVA.
!> 

Arguments:

Parameters
[in]JOBA
!>          JOBA is CHARACTER*1
!>         Specifies the level of accuracy:
!>       = 'C': This option works well (high relative accuracy) if A = B * D,
!>              with well-conditioned B and arbitrary diagonal matrix D.
!>              The accuracy cannot be spoiled by COLUMN scaling. The
!>              accuracy of the computed output depends on the condition of
!>              B, and the procedure aims at the best theoretical accuracy.
!>              The relative error max_{i=1:N}|d sigma_i| / sigma_i is
!>              bounded by f(M,N)*epsilon* cond(B), independent of D.
!>              The input matrix is preprocessed with the QRF with column
!>              pivoting. This initial preprocessing and preconditioning by
!>              a rank revealing QR factorization is common for all values of
!>              JOBA. Additional actions are specified as follows:
!>       = 'E': Computation as with 'C' with an additional estimate of the
!>              condition number of B. It provides a realistic error bound.
!>       = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings
!>              D1, D2, and well-conditioned matrix C, this option gives
!>              higher accuracy than the 'C' option. If the structure of the
!>              input matrix is not known, and relative accuracy is
!>              desirable, then this option is advisable. The input matrix A
!>              is preprocessed with QR factorization with FULL (row and
!>              column) pivoting.
!>       = 'G': Computation as with 'F' with an additional estimate of the
!>              condition number of B, where A=B*D. If A has heavily weighted
!>              rows, then using this condition number gives too pessimistic
!>              error bound.
!>       = 'A': Small singular values are not well determined by the data 
!>              and are considered as noisy; the matrix is treated as
!>              numerically rank deficient. The error in the computed
!>              singular values is bounded by f(m,n)*epsilon*||A||.
!>              The computed SVD A = U * S * V^* restores A up to
!>              f(m,n)*epsilon*||A||.
!>              This gives the procedure the licence to discard (set to zero)
!>              all singular values below N*epsilon*||A||.
!>       = 'R': Similar as in 'A'. Rank revealing property of the initial
!>              QR factorization is used do reveal (using triangular factor)
!>              a gap sigma_{r+1} < epsilon * sigma_r in which case the
!>              numerical RANK is declared to be r. The SVD is computed with
!>              absolute error bounds, but more accurately than with 'A'.
!> 
[in]JOBU
!>          JOBU is CHARACTER*1
!>         Specifies whether to compute the columns of U:
!>       = 'U': N columns of U are returned in the array U.
!>       = 'F': full set of M left sing. vectors is returned in the array U.
!>       = 'W': U may be used as workspace of length M*N. See the description
!>              of U.
!>       = 'N': U is not computed.
!> 
[in]JOBV
!>          JOBV is CHARACTER*1
!>         Specifies whether to compute the matrix V:
!>       = 'V': N columns of V are returned in the array V; Jacobi rotations
!>              are not explicitly accumulated.
!>       = 'J': N columns of V are returned in the array V, but they are
!>              computed as the product of Jacobi rotations, if JOBT = 'N'.
!>       = 'W': V may be used as workspace of length N*N. See the description
!>              of V.
!>       = 'N': V is not computed.
!> 
[in]JOBR
!>          JOBR is CHARACTER*1
!>         Specifies the RANGE for the singular values. Issues the licence to
!>         set to zero small positive singular values if they are outside
!>         specified range. If A .NE. 0 is scaled so that the largest singular
!>         value of c*A is around SQRT(BIG), BIG=DLAMCH('O'), then JOBR issues
!>         the licence to kill columns of A whose norm in c*A is less than
!>         SQRT(SFMIN) (for JOBR = 'R'), or less than SMALL=SFMIN/EPSLN,
!>         where SFMIN=DLAMCH('S'), EPSLN=DLAMCH('E').
!>       = 'N': Do not kill small columns of c*A. This option assumes that
!>              BLAS and QR factorizations and triangular solvers are
!>              implemented to work in that range. If the condition of A
!>              is greater than BIG, use ZGESVJ.
!>       = 'R': RESTRICTED range for sigma(c*A) is [SQRT(SFMIN), SQRT(BIG)]
!>              (roughly, as described above). This option is recommended.
!>                                             ===========================
!>         For computing the singular values in the FULL range [SFMIN,BIG]
!>         use ZGESVJ.
!> 
[in]JOBT
!>          JOBT is CHARACTER*1
!>         If the matrix is square then the procedure may determine to use
!>         transposed A if A^* seems to be better with respect to convergence.
!>         If the matrix is not square, JOBT is ignored. 
!>         The decision is based on two values of entropy over the adjoint
!>         orbit of A^* * A. See the descriptions of WORK(6) and WORK(7).
!>       = 'T': transpose if entropy test indicates possibly faster
!>         convergence of Jacobi process if A^* is taken as input. If A is
!>         replaced with A^*, then the row pivoting is included automatically.
!>       = 'N': do not speculate.
!>         The option 'T' can be used to compute only the singular values, or
!>         the full SVD (U, SIGMA and V). For only one set of singular vectors
!>         (U or V), the caller should provide both U and V, as one of the
!>         matrices is used as workspace if the matrix A is transposed.
!>         The implementer can easily remove this constraint and make the
!>         code more complicated. See the descriptions of U and V.
!>         In general, this option is considered experimental, and 'N'; should
!>         be preferred. This is subject to changes in the future.
!> 
[in]JOBP
!>          JOBP is CHARACTER*1
!>         Issues the licence to introduce structured perturbations to drown
!>         denormalized numbers. This licence should be active if the
!>         denormals are poorly implemented, causing slow computation,
!>         especially in cases of fast convergence (!). For details see [1,2].
!>         For the sake of simplicity, this perturbations are included only
!>         when the full SVD or only the singular values are requested. The
!>         implementer/user can easily add the perturbation for the cases of
!>         computing one set of singular vectors.
!>       = 'P': introduce perturbation
!>       = 'N': do not perturb
!> 
[in]M
!>          M is INTEGER
!>         The number of rows of the input matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>         The number of columns of the input matrix A. M >= N >= 0.
!> 
[in,out]A
!>          A is COMPLEX*16 array, dimension (LDA,N)
!>          On entry, the M-by-N matrix A.
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A.  LDA >= max(1,M).
!> 
[out]SVA
!>          SVA is DOUBLE PRECISION array, dimension (N)
!>          On exit,
!>          - For WORK(1)/WORK(2) = ONE: The singular values of A. During the
!>            computation SVA contains Euclidean column norms of the
!>            iterated matrices in the array A.
!>          - For WORK(1) .NE. WORK(2): The singular values of A are
!>            (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if
!>            sigma_max(A) overflows or if small singular values have been
!>            saved from underflow by scaling the input matrix A.
!>          - If JOBR='R' then some of the singular values may be returned
!>            as exact zeros obtained by  because they are
!>            below the numerical rank threshold or are denormalized numbers.
!> 
[out]U
!>          U is COMPLEX*16 array, dimension ( LDU, N )
!>          If JOBU = 'U', then U contains on exit the M-by-N matrix of
!>                         the left singular vectors.
!>          If JOBU = 'F', then U contains on exit the M-by-M matrix of
!>                         the left singular vectors, including an ONB
!>                         of the orthogonal complement of the Range(A).
!>          If JOBU = 'W'  .AND. (JOBV = 'V' .AND. JOBT = 'T' .AND. M = N),
!>                         then U is used as workspace if the procedure
!>                         replaces A with A^*. In that case, [V] is computed
!>                         in U as left singular vectors of A^* and then
!>                         copied back to the V array. This 'W' option is just
!>                         a reminder to the caller that in this case U is
!>                         reserved as workspace of length N*N.
!>          If JOBU = 'N'  U is not referenced, unless JOBT='T'.
!> 
[in]LDU
!>          LDU is INTEGER
!>          The leading dimension of the array U,  LDU >= 1.
!>          IF  JOBU = 'U' or 'F' or 'W',  then LDU >= M.
!> 
[out]V
!>          V is COMPLEX*16 array, dimension ( LDV, N )
!>          If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of
!>                         the right singular vectors;
!>          If JOBV = 'W', AND (JOBU = 'U' AND JOBT = 'T' AND M = N),
!>                         then V is used as workspace if the pprocedure
!>                         replaces A with A^*. In that case, [U] is computed
!>                         in V as right singular vectors of A^* and then
!>                         copied back to the U array. This 'W' option is just
!>                         a reminder to the caller that in this case V is
!>                         reserved as workspace of length N*N.
!>          If JOBV = 'N'  V is not referenced, unless JOBT='T'.
!> 
[in]LDV
!>          LDV is INTEGER
!>          The leading dimension of the array V,  LDV >= 1.
!>          If JOBV = 'V' or 'J' or 'W', then LDV >= N.
!> 
[out]CWORK
!>          CWORK is COMPLEX*16 array, dimension (MAX(2,LWORK))
!>          If the call to ZGEJSV is a workspace query (indicated by LWORK=-1 or
!>          LRWORK=-1), then on exit CWORK(1) contains the required length of
!>          CWORK for the job parameters used in the call.
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          Length of CWORK to confirm proper allocation of workspace.
!>          LWORK depends on the job:
!>
!>          1. If only SIGMA is needed ( JOBU = 'N', JOBV = 'N' ) and
!>            1.1 .. no scaled condition estimate required (JOBA.NE.'E'.AND.JOBA.NE.'G'):
!>               LWORK >= 2*N+1. This is the minimal requirement.
!>               ->> For optimal performance (blocked code) the optimal value
!>               is LWORK >= N + (N+1)*NB. Here NB is the optimal
!>               block size for ZGEQP3 and ZGEQRF.
!>               In general, optimal LWORK is computed as
!>               LWORK >= max(N+LWORK(ZGEQP3),N+LWORK(ZGEQRF), LWORK(ZGESVJ)).
!>            1.2. .. an estimate of the scaled condition number of A is
!>               required (JOBA='E', or 'G'). In this case, LWORK the minimal
!>               requirement is LWORK >= N*N + 2*N.
!>               ->> For optimal performance (blocked code) the optimal value
!>               is LWORK >= max(N+(N+1)*NB, N*N+2*N)=N**2+2*N.
!>               In general, the optimal length LWORK is computed as
!>               LWORK >= max(N+LWORK(ZGEQP3),N+LWORK(ZGEQRF), LWORK(ZGESVJ),
!>                            N*N+LWORK(ZPOCON)).
!>          2. If SIGMA and the right singular vectors are needed (JOBV = 'V'),
!>             (JOBU = 'N')
!>            2.1   .. no scaled condition estimate requested (JOBE = 'N'):    
!>            -> the minimal requirement is LWORK >= 3*N.
!>            -> For optimal performance, 
!>               LWORK >= max(N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB,
!>               where NB is the optimal block size for ZGEQP3, ZGEQRF, ZGELQ,
!>               ZUNMLQ. In general, the optimal length LWORK is computed as
!>               LWORK >= max(N+LWORK(ZGEQP3), N+LWORK(ZGESVJ),
!>                       N+LWORK(ZGELQF), 2*N+LWORK(ZGEQRF), N+LWORK(ZUNMLQ)).
!>            2.2 .. an estimate of the scaled condition number of A is
!>               required (JOBA='E', or 'G').
!>            -> the minimal requirement is LWORK >= 3*N.      
!>            -> For optimal performance, 
!>               LWORK >= max(N+(N+1)*NB, 2*N,2*N+N*NB)=2*N+N*NB,
!>               where NB is the optimal block size for ZGEQP3, ZGEQRF, ZGELQ,
!>               ZUNMLQ. In general, the optimal length LWORK is computed as
!>               LWORK >= max(N+LWORK(ZGEQP3), LWORK(ZPOCON), N+LWORK(ZGESVJ),
!>                       N+LWORK(ZGELQF), 2*N+LWORK(ZGEQRF), N+LWORK(ZUNMLQ)).   
!>          3. If SIGMA and the left singular vectors are needed
!>            3.1  .. no scaled condition estimate requested (JOBE = 'N'):
!>            -> the minimal requirement is LWORK >= 3*N.
!>            -> For optimal performance:
!>               if JOBU = 'U' :: LWORK >= max(3*N, N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB,
!>               where NB is the optimal block size for ZGEQP3, ZGEQRF, ZUNMQR.
!>               In general, the optimal length LWORK is computed as
!>               LWORK >= max(N+LWORK(ZGEQP3), 2*N+LWORK(ZGEQRF), N+LWORK(ZUNMQR)). 
!>            3.2  .. an estimate of the scaled condition number of A is
!>               required (JOBA='E', or 'G').
!>            -> the minimal requirement is LWORK >= 3*N.
!>            -> For optimal performance:
!>               if JOBU = 'U' :: LWORK >= max(3*N, N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB,
!>               where NB is the optimal block size for ZGEQP3, ZGEQRF, ZUNMQR.
!>               In general, the optimal length LWORK is computed as
!>               LWORK >= max(N+LWORK(ZGEQP3),N+LWORK(ZPOCON),
!>                        2*N+LWORK(ZGEQRF), N+LWORK(ZUNMQR)).
!>          4. If the full SVD is needed: (JOBU = 'U' or JOBU = 'F') and 
!>            4.1. if JOBV = 'V'  
!>               the minimal requirement is LWORK >= 5*N+2*N*N. 
!>            4.2. if JOBV = 'J' the minimal requirement is 
!>               LWORK >= 4*N+N*N.
!>            In both cases, the allocated CWORK can accommodate blocked runs
!>            of ZGEQP3, ZGEQRF, ZGELQF, SUNMQR, ZUNMLQ.
!>
!>          If the call to ZGEJSV is a workspace query (indicated by LWORK=-1 or
!>          LRWORK=-1), then on exit CWORK(1) contains the optimal and CWORK(2) contains the
!>          minimal length of CWORK for the job parameters used in the call.
!> 
[out]RWORK
!>          RWORK is DOUBLE PRECISION array, dimension (MAX(7,LWORK))
!>          On exit,
!>          RWORK(1) = Determines the scaling factor SCALE = RWORK(2) / RWORK(1)
!>                    such that SCALE*SVA(1:N) are the computed singular values
!>                    of A. (See the description of SVA().)
!>          RWORK(2) = See the description of RWORK(1).
!>          RWORK(3) = SCONDA is an estimate for the condition number of
!>                    column equilibrated A. (If JOBA = 'E' or 'G')
!>                    SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1).
!>                    It is computed using ZPOCON. It holds
!>                    N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
!>                    where R is the triangular factor from the QRF of A.
!>                    However, if R is truncated and the numerical rank is
!>                    determined to be strictly smaller than N, SCONDA is
!>                    returned as -1, thus indicating that the smallest
!>                    singular values might be lost.
!>
!>          If full SVD is needed, the following two condition numbers are
!>          useful for the analysis of the algorithm. They are provided for
!>          a developer/implementer who is familiar with the details of
!>          the method.
!>
!>          RWORK(4) = an estimate of the scaled condition number of the
!>                    triangular factor in the first QR factorization.
!>          RWORK(5) = an estimate of the scaled condition number of the
!>                    triangular factor in the second QR factorization.
!>          The following two parameters are computed if JOBT = 'T'.
!>          They are provided for a developer/implementer who is familiar
!>          with the details of the method.
!>          RWORK(6) = the entropy of A^* * A :: this is the Shannon entropy
!>                    of diag(A^* * A) / Trace(A^* * A) taken as point in the
!>                    probability simplex.
!>          RWORK(7) = the entropy of A * A^*. (See the description of RWORK(6).)
!>          If the call to ZGEJSV is a workspace query (indicated by LWORK=-1 or
!>          LRWORK=-1), then on exit RWORK(1) contains the required length of
!>          RWORK for the job parameters used in the call.
!> 
[in]LRWORK
!>          LRWORK is INTEGER
!>          Length of RWORK to confirm proper allocation of workspace.
!>          LRWORK depends on the job:
!>
!>       1. If only the singular values are requested i.e. if
!>          LSAME(JOBU,'N') .AND. LSAME(JOBV,'N')
!>          then:
!>          1.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
!>               then: LRWORK = max( 7, 2 * M ).
!>          1.2. Otherwise, LRWORK  = max( 7,  N ).
!>       2. If singular values with the right singular vectors are requested
!>          i.e. if
!>          (LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) .AND.
!>          .NOT.(LSAME(JOBU,'U').OR.LSAME(JOBU,'F'))
!>          then:
!>          2.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
!>          then LRWORK = max( 7, 2 * M ).
!>          2.2. Otherwise, LRWORK  = max( 7,  N ).
!>       3. If singular values with the left singular vectors are requested, i.e. if
!>          (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND.
!>          .NOT.(LSAME(JOBV,'V').OR.LSAME(JOBV,'J'))
!>          then:
!>          3.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
!>          then LRWORK = max( 7, 2 * M ).
!>          3.2. Otherwise, LRWORK  = max( 7,  N ).
!>       4. If singular values with both the left and the right singular vectors
!>          are requested, i.e. if
!>          (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND.
!>          (LSAME(JOBV,'V').OR.LSAME(JOBV,'J'))
!>          then:
!>          4.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
!>          then LRWORK = max( 7, 2 * M ).
!>          4.2. Otherwise, LRWORK  = max( 7, N ).
!>
!>          If, on entry, LRWORK = -1 or LWORK=-1, a workspace query is assumed and 
!>          the length of RWORK is returned in RWORK(1). 
!> 
[out]IWORK
!>          IWORK is INTEGER array, of dimension at least 4, that further depends 
!>          on the job:
!>
!>          1. If only the singular values are requested then:
!>             If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) 
!>             then the length of IWORK is N+M; otherwise the length of IWORK is N.
!>          2. If the singular values and the right singular vectors are requested then:
!>             If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) 
!>             then the length of IWORK is N+M; otherwise the length of IWORK is N. 
!>          3. If the singular values and the left singular vectors are requested then:
!>             If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) 
!>             then the length of IWORK is N+M; otherwise the length of IWORK is N. 
!>          4. If the singular values with both the left and the right singular vectors
!>             are requested, then:      
!>             4.1. If LSAME(JOBV,'J') the length of IWORK is determined as follows:
!>                  If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) 
!>                  then the length of IWORK is N+M; otherwise the length of IWORK is N. 
!>             4.2. If LSAME(JOBV,'V') the length of IWORK is determined as follows:
!>                  If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) 
!>                  then the length of IWORK is 2*N+M; otherwise the length of IWORK is 2*N.
!>        
!>          On exit,
!>          IWORK(1) = the numerical rank determined after the initial
!>                     QR factorization with pivoting. See the descriptions
!>                     of JOBA and JOBR.
!>          IWORK(2) = the number of the computed nonzero singular values
!>          IWORK(3) = if nonzero, a warning message:
!>                     If IWORK(3) = 1 then some of the column norms of A
!>                     were denormalized floats. The requested high accuracy
!>                     is not warranted by the data.
!>          IWORK(4) = 1 or -1. If IWORK(4) = 1, then the procedure used A^* to
!>                     do the job as specified by the JOB parameters.
!>          If the call to ZGEJSV is a workspace query (indicated by LWORK = -1 or
!>          LRWORK = -1), then on exit IWORK(1) contains the required length of 
!>          IWORK for the job parameters used in the call.
!> 
[out]INFO
!>          INFO is INTEGER
!>           < 0:  if INFO = -i, then the i-th argument had an illegal value.
!>           = 0:  successful exit;
!>           > 0:  ZGEJSV  did not converge in the maximal allowed number
!>                 of sweeps. The computed values may be inaccurate.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Further Details:
!>
!>  ZGEJSV implements a preconditioned Jacobi SVD algorithm. It uses ZGEQP3,
!>  ZGEQRF, and ZGELQF as preprocessors and preconditioners. Optionally, an
!>  additional row pivoting can be used as a preprocessor, which in some
!>  cases results in much higher accuracy. An example is matrix A with the
!>  structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned
!>  diagonal matrices and C is well-conditioned matrix. In that case, complete
!>  pivoting in the first QR factorizations provides accuracy dependent on the
!>  condition number of C, and independent of D1, D2. Such higher accuracy is
!>  not completely understood theoretically, but it works well in practice.
!>  Further, if A can be written as A = B*D, with well-conditioned B and some
!>  diagonal D, then the high accuracy is guaranteed, both theoretically and
!>  in software, independent of D. For more details see [1], [2].
!>     The computational range for the singular values can be the full range
!>  ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS
!>  & LAPACK routines called by ZGEJSV are implemented to work in that range.
!>  If that is not the case, then the restriction for safe computation with
!>  the singular values in the range of normalized IEEE numbers is that the
!>  spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not
!>  overflow. This code (ZGEJSV) is best used in this restricted range,
!>  meaning that singular values of magnitude below ||A||_2 / DLAMCH('O') are
!>  returned as zeros. See JOBR for details on this.
!>     Further, this implementation is somewhat slower than the one described
!>  in [1,2] due to replacement of some non-LAPACK components, and because
!>  the choice of some tuning parameters in the iterative part (ZGESVJ) is
!>  left to the implementer on a particular machine.
!>     The rank revealing QR factorization (in this code: ZGEQP3) should be
!>  implemented as in [3]. We have a new version of ZGEQP3 under development
!>  that is more robust than the current one in LAPACK, with a cleaner cut in
!>  rank deficient cases. It will be available in the SIGMA library [4].
!>  If M is much larger than N, it is obvious that the initial QRF with
!>  column pivoting can be preprocessed by the QRF without pivoting. That
!>  well known trick is not used in ZGEJSV because in some cases heavy row
!>  weighting can be treated with complete pivoting. The overhead in cases
!>  M much larger than N is then only due to pivoting, but the benefits in
!>  terms of accuracy have prevailed. The implementer/user can incorporate
!>  this extra QRF step easily. The implementer can also improve data movement
!>  (matrix transpose, matrix copy, matrix transposed copy) - this
!>  implementation of ZGEJSV uses only the simplest, naive data movement.
!> 
Contributor:
Zlatko Drmac, Department of Mathematics, Faculty of Science, University of Zagreb (Zagreb, Croatia); drmac.nosp@m.@mat.nosp@m.h.hr
References:
!>
!> [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I.
!>     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342.
!>     LAPACK Working note 169.
!> [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II.
!>     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362.
!>     LAPACK Working note 170.
!> [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR
!>     factorization software - a case study.
!>     ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28.
!>     LAPACK Working note 176.
!> [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV,
!>     QSVD, (H,K)-SVD computations.
!>     Department of Mathematics, University of Zagreb, 2008, 2016.
!> 
Bugs, examples and comments:
Please report all bugs and send interesting examples and/or comments to drmac.nosp@m.@mat.nosp@m.h.hr. Thank you.

Definition at line 566 of file zgejsv.f.

569*
570* -- LAPACK computational routine --
571* -- LAPACK is a software package provided by Univ. of Tennessee, --
572* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
573*
574* .. Scalar Arguments ..
575 IMPLICIT NONE
576 INTEGER INFO, LDA, LDU, LDV, LWORK, LRWORK, M, N
577* ..
578* .. Array Arguments ..
579 COMPLEX*16 A( LDA, * ), U( LDU, * ), V( LDV, * ),
580 $ CWORK( LWORK )
581 DOUBLE PRECISION SVA( N ), RWORK( LRWORK )
582 INTEGER IWORK( * )
583 CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV
584* ..
585*
586* ===========================================================================
587*
588* .. Local Parameters ..
589 DOUBLE PRECISION ZERO, ONE
590 parameter( zero = 0.0d0, one = 1.0d0 )
591 COMPLEX*16 CZERO, CONE
592 parameter( czero = ( 0.0d0, 0.0d0 ), cone = ( 1.0d0, 0.0d0 ) )
593* ..
594* .. Local Scalars ..
595 COMPLEX*16 CTEMP
596 DOUBLE PRECISION AAPP, AAQQ, AATMAX, AATMIN, BIG, BIG1,
597 $ COND_OK, CONDR1, CONDR2, ENTRA, ENTRAT, EPSLN,
598 $ MAXPRJ, SCALEM, SCONDA, SFMIN, SMALL, TEMP1,
599 $ USCAL1, USCAL2, XSC
600 INTEGER IERR, N1, NR, NUMRANK, p, q, WARNING
601 LOGICAL ALMORT, DEFR, ERREST, GOSCAL, JRACC, KILL, LQUERY,
602 $ LSVEC, L2ABER, L2KILL, L2PERT, L2RANK, L2TRAN, NOSCAL,
603 $ ROWPIV, RSVEC, TRANSP
604*
605 INTEGER OPTWRK, MINWRK, MINRWRK, MINIWRK
606 INTEGER LWCON, LWLQF, LWQP3, LWQRF, LWUNMLQ, LWUNMQR, LWUNMQRM,
607 $ LWSVDJ, LWSVDJV, LRWQP3, LRWCON, LRWSVDJ, IWOFF
608 INTEGER LWRK_ZGELQF, LWRK_ZGEQP3, LWRK_ZGEQP3N, LWRK_ZGEQRF,
609 $ LWRK_ZGESVJ, LWRK_ZGESVJV, LWRK_ZGESVJU, LWRK_ZUNMLQ,
610 $ LWRK_ZUNMQR, LWRK_ZUNMQRM
611* ..
612* .. Local Arrays
613 COMPLEX*16 CDUMMY(1)
614 DOUBLE PRECISION RDUMMY(1)
615*
616* .. Intrinsic Functions ..
617 INTRINSIC abs, dcmplx, conjg, dlog, max, min, dble, nint, sqrt
618* ..
619* .. External Functions ..
620 DOUBLE PRECISION DLAMCH, DZNRM2
621 INTEGER IDAMAX, IZAMAX
622 LOGICAL LSAME
623 EXTERNAL idamax, izamax, lsame, dlamch, dznrm2
624* ..
625* .. External Subroutines ..
629 $ xerbla
630*
631 EXTERNAL zgesvj
632* ..
633*
634* Test the input arguments
635*
636 lsvec = lsame( jobu, 'U' ) .OR. lsame( jobu, 'F' )
637 jracc = lsame( jobv, 'J' )
638 rsvec = lsame( jobv, 'V' ) .OR. jracc
639 rowpiv = lsame( joba, 'F' ) .OR. lsame( joba, 'G' )
640 l2rank = lsame( joba, 'R' )
641 l2aber = lsame( joba, 'A' )
642 errest = lsame( joba, 'E' ) .OR. lsame( joba, 'G' )
643 l2tran = lsame( jobt, 'T' ) .AND. ( m .EQ. n )
644 l2kill = lsame( jobr, 'R' )
645 defr = lsame( jobr, 'N' )
646 l2pert = lsame( jobp, 'P' )
647*
648 lquery = ( lwork .EQ. -1 ) .OR. ( lrwork .EQ. -1 )
649*
650 IF ( .NOT.(rowpiv .OR. l2rank .OR. l2aber .OR.
651 $ errest .OR. lsame( joba, 'C' ) )) THEN
652 info = - 1
653 ELSE IF ( .NOT.( lsvec .OR. lsame( jobu, 'N' ) .OR.
654 $ ( lsame( jobu, 'W' ) .AND. rsvec .AND. l2tran ) ) ) THEN
655 info = - 2
656 ELSE IF ( .NOT.( rsvec .OR. lsame( jobv, 'N' ) .OR.
657 $ ( lsame( jobv, 'W' ) .AND. lsvec .AND. l2tran ) ) ) THEN
658 info = - 3
659 ELSE IF ( .NOT. ( l2kill .OR. defr ) ) THEN
660 info = - 4
661 ELSE IF ( .NOT. ( lsame(jobt,'T') .OR. lsame(jobt,'N') ) ) THEN
662 info = - 5
663 ELSE IF ( .NOT. ( l2pert .OR. lsame( jobp, 'N' ) ) ) THEN
664 info = - 6
665 ELSE IF ( m .LT. 0 ) THEN
666 info = - 7
667 ELSE IF ( ( n .LT. 0 ) .OR. ( n .GT. m ) ) THEN
668 info = - 8
669 ELSE IF ( lda .LT. m ) THEN
670 info = - 10
671 ELSE IF ( lsvec .AND. ( ldu .LT. m ) ) THEN
672 info = - 13
673 ELSE IF ( rsvec .AND. ( ldv .LT. n ) ) THEN
674 info = - 15
675 ELSE
676* #:)
677 info = 0
678 END IF
679*
680 IF ( info .EQ. 0 ) THEN
681* .. compute the minimal and the optimal workspace lengths
682* [[The expressions for computing the minimal and the optimal
683* values of LCWORK, LRWORK are written with a lot of redundancy and
684* can be simplified. However, this verbose form is useful for
685* maintenance and modifications of the code.]]
686*
687* .. minimal workspace length for ZGEQP3 of an M x N matrix,
688* ZGEQRF of an N x N matrix, ZGELQF of an N x N matrix,
689* ZUNMLQ for computing N x N matrix, ZUNMQR for computing N x N
690* matrix, ZUNMQR for computing M x N matrix, respectively.
691 lwqp3 = n+1
692 lwqrf = max( 1, n )
693 lwlqf = max( 1, n )
694 lwunmlq = max( 1, n )
695 lwunmqr = max( 1, n )
696 lwunmqrm = max( 1, m )
697* .. minimal workspace length for ZPOCON of an N x N matrix
698 lwcon = 2 * n
699* .. minimal workspace length for ZGESVJ of an N x N matrix,
700* without and with explicit accumulation of Jacobi rotations
701 lwsvdj = max( 2 * n, 1 )
702 lwsvdjv = max( 2 * n, 1 )
703* .. minimal REAL workspace length for ZGEQP3, ZPOCON, ZGESVJ
704 lrwqp3 = 2 * n
705 lrwcon = n
706 lrwsvdj = n
707 IF ( lquery ) THEN
708 CALL zgeqp3( m, n, a, lda, iwork, cdummy, cdummy, -1,
709 $ rdummy, ierr )
710 lwrk_zgeqp3 = dble( cdummy(1) )
711 CALL zgeqrf( n, n, a, lda, cdummy, cdummy,-1, ierr )
712 lwrk_zgeqrf = dble( cdummy(1) )
713 CALL zgelqf( n, n, a, lda, cdummy, cdummy,-1, ierr )
714 lwrk_zgelqf = dble( cdummy(1) )
715 END IF
716 minwrk = 2
717 optwrk = 2
718 miniwrk = n
719 IF ( .NOT. (lsvec .OR. rsvec ) ) THEN
720* .. minimal and optimal sizes of the complex workspace if
721* only the singular values are requested
722 IF ( errest ) THEN
723 minwrk = max( n+lwqp3, n**2+lwcon, n+lwqrf, lwsvdj )
724 ELSE
725 minwrk = max( n+lwqp3, n+lwqrf, lwsvdj )
726 END IF
727 IF ( lquery ) THEN
728 CALL zgesvj( 'L', 'N', 'N', n, n, a, lda, sva, n, v,
729 $ ldv, cdummy, -1, rdummy, -1, ierr )
730 lwrk_zgesvj = dble( cdummy(1) )
731 IF ( errest ) THEN
732 optwrk = max( n+lwrk_zgeqp3, n**2+lwcon,
733 $ n+lwrk_zgeqrf, lwrk_zgesvj )
734 ELSE
735 optwrk = max( n+lwrk_zgeqp3, n+lwrk_zgeqrf,
736 $ lwrk_zgesvj )
737 END IF
738 END IF
739 IF ( l2tran .OR. rowpiv ) THEN
740 IF ( errest ) THEN
741 minrwrk = max( 7, 2*m, lrwqp3, lrwcon, lrwsvdj )
742 ELSE
743 minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj )
744 END IF
745 ELSE
746 IF ( errest ) THEN
747 minrwrk = max( 7, lrwqp3, lrwcon, lrwsvdj )
748 ELSE
749 minrwrk = max( 7, lrwqp3, lrwsvdj )
750 END IF
751 END IF
752 IF ( rowpiv .OR. l2tran ) miniwrk = miniwrk + m
753 ELSE IF ( rsvec .AND. (.NOT.lsvec) ) THEN
754* .. minimal and optimal sizes of the complex workspace if the
755* singular values and the right singular vectors are requested
756 IF ( errest ) THEN
757 minwrk = max( n+lwqp3, lwcon, lwsvdj, n+lwlqf,
758 $ 2*n+lwqrf, n+lwsvdj, n+lwunmlq )
759 ELSE
760 minwrk = max( n+lwqp3, lwsvdj, n+lwlqf, 2*n+lwqrf,
761 $ n+lwsvdj, n+lwunmlq )
762 END IF
763 IF ( lquery ) THEN
764 CALL zgesvj( 'L', 'U', 'N', n,n, u, ldu, sva, n, a,
765 $ lda, cdummy, -1, rdummy, -1, ierr )
766 lwrk_zgesvj = dble( cdummy(1) )
767 CALL zunmlq( 'L', 'C', n, n, n, a, lda, cdummy,
768 $ v, ldv, cdummy, -1, ierr )
769 lwrk_zunmlq = dble( cdummy(1) )
770 IF ( errest ) THEN
771 optwrk = max( n+lwrk_zgeqp3, lwcon, lwrk_zgesvj,
772 $ n+lwrk_zgelqf, 2*n+lwrk_zgeqrf,
773 $ n+lwrk_zgesvj, n+lwrk_zunmlq )
774 ELSE
775 optwrk = max( n+lwrk_zgeqp3, lwrk_zgesvj,n+lwrk_zgelqf,
776 $ 2*n+lwrk_zgeqrf, n+lwrk_zgesvj,
777 $ n+lwrk_zunmlq )
778 END IF
779 END IF
780 IF ( l2tran .OR. rowpiv ) THEN
781 IF ( errest ) THEN
782 minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj, lrwcon )
783 ELSE
784 minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj )
785 END IF
786 ELSE
787 IF ( errest ) THEN
788 minrwrk = max( 7, lrwqp3, lrwsvdj, lrwcon )
789 ELSE
790 minrwrk = max( 7, lrwqp3, lrwsvdj )
791 END IF
792 END IF
793 IF ( rowpiv .OR. l2tran ) miniwrk = miniwrk + m
794 ELSE IF ( lsvec .AND. (.NOT.rsvec) ) THEN
795* .. minimal and optimal sizes of the complex workspace if the
796* singular values and the left singular vectors are requested
797 IF ( errest ) THEN
798 minwrk = n + max( lwqp3,lwcon,n+lwqrf,lwsvdj,lwunmqrm )
799 ELSE
800 minwrk = n + max( lwqp3, n+lwqrf, lwsvdj, lwunmqrm )
801 END IF
802 IF ( lquery ) THEN
803 CALL zgesvj( 'L', 'U', 'N', n,n, u, ldu, sva, n, a,
804 $ lda, cdummy, -1, rdummy, -1, ierr )
805 lwrk_zgesvj = dble( cdummy(1) )
806 CALL zunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,
807 $ ldu, cdummy, -1, ierr )
808 lwrk_zunmqrm = dble( cdummy(1) )
809 IF ( errest ) THEN
810 optwrk = n + max( lwrk_zgeqp3, lwcon, n+lwrk_zgeqrf,
811 $ lwrk_zgesvj, lwrk_zunmqrm )
812 ELSE
813 optwrk = n + max( lwrk_zgeqp3, n+lwrk_zgeqrf,
814 $ lwrk_zgesvj, lwrk_zunmqrm )
815 END IF
816 END IF
817 IF ( l2tran .OR. rowpiv ) THEN
818 IF ( errest ) THEN
819 minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj, lrwcon )
820 ELSE
821 minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj )
822 END IF
823 ELSE
824 IF ( errest ) THEN
825 minrwrk = max( 7, lrwqp3, lrwsvdj, lrwcon )
826 ELSE
827 minrwrk = max( 7, lrwqp3, lrwsvdj )
828 END IF
829 END IF
830 IF ( rowpiv .OR. l2tran ) miniwrk = miniwrk + m
831 ELSE
832* .. minimal and optimal sizes of the complex workspace if the
833* full SVD is requested
834 IF ( .NOT. jracc ) THEN
835 IF ( errest ) THEN
836 minwrk = max( n+lwqp3, n+lwcon, 2*n+n**2+lwcon,
837 $ 2*n+lwqrf, 2*n+lwqp3,
838 $ 2*n+n**2+n+lwlqf, 2*n+n**2+n+n**2+lwcon,
839 $ 2*n+n**2+n+lwsvdj, 2*n+n**2+n+lwsvdjv,
840 $ 2*n+n**2+n+lwunmqr,2*n+n**2+n+lwunmlq,
841 $ n+n**2+lwsvdj, n+lwunmqrm )
842 ELSE
843 minwrk = max( n+lwqp3, 2*n+n**2+lwcon,
844 $ 2*n+lwqrf, 2*n+lwqp3,
845 $ 2*n+n**2+n+lwlqf, 2*n+n**2+n+n**2+lwcon,
846 $ 2*n+n**2+n+lwsvdj, 2*n+n**2+n+lwsvdjv,
847 $ 2*n+n**2+n+lwunmqr,2*n+n**2+n+lwunmlq,
848 $ n+n**2+lwsvdj, n+lwunmqrm )
849 END IF
850 miniwrk = miniwrk + n
851 IF ( rowpiv .OR. l2tran ) miniwrk = miniwrk + m
852 ELSE
853 IF ( errest ) THEN
854 minwrk = max( n+lwqp3, n+lwcon, 2*n+lwqrf,
855 $ 2*n+n**2+lwsvdjv, 2*n+n**2+n+lwunmqr,
856 $ n+lwunmqrm )
857 ELSE
858 minwrk = max( n+lwqp3, 2*n+lwqrf,
859 $ 2*n+n**2+lwsvdjv, 2*n+n**2+n+lwunmqr,
860 $ n+lwunmqrm )
861 END IF
862 IF ( rowpiv .OR. l2tran ) miniwrk = miniwrk + m
863 END IF
864 IF ( lquery ) THEN
865 CALL zunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,
866 $ ldu, cdummy, -1, ierr )
867 lwrk_zunmqrm = dble( cdummy(1) )
868 CALL zunmqr( 'L', 'N', n, n, n, a, lda, cdummy, u,
869 $ ldu, cdummy, -1, ierr )
870 lwrk_zunmqr = dble( cdummy(1) )
871 IF ( .NOT. jracc ) THEN
872 CALL zgeqp3( n,n, a, lda, iwork, cdummy,cdummy, -1,
873 $ rdummy, ierr )
874 lwrk_zgeqp3n = dble( cdummy(1) )
875 CALL zgesvj( 'L', 'U', 'N', n, n, u, ldu, sva,
876 $ n, v, ldv, cdummy, -1, rdummy, -1, ierr )
877 lwrk_zgesvj = dble( cdummy(1) )
878 CALL zgesvj( 'U', 'U', 'N', n, n, u, ldu, sva,
879 $ n, v, ldv, cdummy, -1, rdummy, -1, ierr )
880 lwrk_zgesvju = dble( cdummy(1) )
881 CALL zgesvj( 'L', 'U', 'V', n, n, u, ldu, sva,
882 $ n, v, ldv, cdummy, -1, rdummy, -1, ierr )
883 lwrk_zgesvjv = dble( cdummy(1) )
884 CALL zunmlq( 'L', 'C', n, n, n, a, lda, cdummy,
885 $ v, ldv, cdummy, -1, ierr )
886 lwrk_zunmlq = dble( cdummy(1) )
887 IF ( errest ) THEN
888 optwrk = max( n+lwrk_zgeqp3, n+lwcon,
889 $ 2*n+n**2+lwcon, 2*n+lwrk_zgeqrf,
890 $ 2*n+lwrk_zgeqp3n,
891 $ 2*n+n**2+n+lwrk_zgelqf,
892 $ 2*n+n**2+n+n**2+lwcon,
893 $ 2*n+n**2+n+lwrk_zgesvj,
894 $ 2*n+n**2+n+lwrk_zgesvjv,
895 $ 2*n+n**2+n+lwrk_zunmqr,
896 $ 2*n+n**2+n+lwrk_zunmlq,
897 $ n+n**2+lwrk_zgesvju,
898 $ n+lwrk_zunmqrm )
899 ELSE
900 optwrk = max( n+lwrk_zgeqp3,
901 $ 2*n+n**2+lwcon, 2*n+lwrk_zgeqrf,
902 $ 2*n+lwrk_zgeqp3n,
903 $ 2*n+n**2+n+lwrk_zgelqf,
904 $ 2*n+n**2+n+n**2+lwcon,
905 $ 2*n+n**2+n+lwrk_zgesvj,
906 $ 2*n+n**2+n+lwrk_zgesvjv,
907 $ 2*n+n**2+n+lwrk_zunmqr,
908 $ 2*n+n**2+n+lwrk_zunmlq,
909 $ n+n**2+lwrk_zgesvju,
910 $ n+lwrk_zunmqrm )
911 END IF
912 ELSE
913 CALL zgesvj( 'L', 'U', 'V', n, n, u, ldu, sva,
914 $ n, v, ldv, cdummy, -1, rdummy, -1, ierr )
915 lwrk_zgesvjv = dble( cdummy(1) )
916 CALL zunmqr( 'L', 'N', n, n, n, cdummy, n, cdummy,
917 $ v, ldv, cdummy, -1, ierr )
918 lwrk_zunmqr = dble( cdummy(1) )
919 CALL zunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,
920 $ ldu, cdummy, -1, ierr )
921 lwrk_zunmqrm = dble( cdummy(1) )
922 IF ( errest ) THEN
923 optwrk = max( n+lwrk_zgeqp3, n+lwcon,
924 $ 2*n+lwrk_zgeqrf, 2*n+n**2,
925 $ 2*n+n**2+lwrk_zgesvjv,
926 $ 2*n+n**2+n+lwrk_zunmqr,n+lwrk_zunmqrm )
927 ELSE
928 optwrk = max( n+lwrk_zgeqp3, 2*n+lwrk_zgeqrf,
929 $ 2*n+n**2, 2*n+n**2+lwrk_zgesvjv,
930 $ 2*n+n**2+n+lwrk_zunmqr,
931 $ n+lwrk_zunmqrm )
932 END IF
933 END IF
934 END IF
935 IF ( l2tran .OR. rowpiv ) THEN
936 minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj, lrwcon )
937 ELSE
938 minrwrk = max( 7, lrwqp3, lrwsvdj, lrwcon )
939 END IF
940 END IF
941 minwrk = max( 2, minwrk )
942 optwrk = max( minwrk, optwrk )
943 IF ( lwork .LT. minwrk .AND. (.NOT.lquery) ) info = - 17
944 IF ( lrwork .LT. minrwrk .AND. (.NOT.lquery) ) info = - 19
945 END IF
946*
947 IF ( info .NE. 0 ) THEN
948* #:(
949 CALL xerbla( 'ZGEJSV', - info )
950 RETURN
951 ELSE IF ( lquery ) THEN
952 cwork(1) = optwrk
953 cwork(2) = minwrk
954 rwork(1) = minrwrk
955 iwork(1) = max( 4, miniwrk )
956 RETURN
957 END IF
958*
959* Quick return for void matrix (Y3K safe)
960* #:)
961 IF ( ( m .EQ. 0 ) .OR. ( n .EQ. 0 ) ) THEN
962 iwork(1:4) = 0
963 rwork(1:7) = 0
964 RETURN
965 ENDIF
966*
967* Determine whether the matrix U should be M x N or M x M
968*
969 IF ( lsvec ) THEN
970 n1 = n
971 IF ( lsame( jobu, 'F' ) ) n1 = m
972 END IF
973*
974* Set numerical parameters
975*
976*! NOTE: Make sure DLAMCH() does not fail on the target architecture.
977*
978 epsln = dlamch('Epsilon')
979 sfmin = dlamch('SafeMinimum')
980 small = sfmin / epsln
981 big = dlamch('O')
982* BIG = ONE / SFMIN
983*
984* Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N
985*
986*(!) If necessary, scale SVA() to protect the largest norm from
987* overflow. It is possible that this scaling pushes the smallest
988* column norm left from the underflow threshold (extreme case).
989*
990 scalem = one / sqrt(dble(m)*dble(n))
991 noscal = .true.
992 goscal = .true.
993 DO 1874 p = 1, n
994 aapp = zero
995 aaqq = one
996 CALL zlassq( m, a(1,p), 1, aapp, aaqq )
997 IF ( aapp .GT. big ) THEN
998 info = - 9
999 CALL xerbla( 'ZGEJSV', -info )
1000 RETURN
1001 END IF
1002 aaqq = sqrt(aaqq)
1003 IF ( ( aapp .LT. (big / aaqq) ) .AND. noscal ) THEN
1004 sva(p) = aapp * aaqq
1005 ELSE
1006 noscal = .false.
1007 sva(p) = aapp * ( aaqq * scalem )
1008 IF ( goscal ) THEN
1009 goscal = .false.
1010 CALL dscal( p-1, scalem, sva, 1 )
1011 END IF
1012 END IF
1013 1874 CONTINUE
1014*
1015 IF ( noscal ) scalem = one
1016*
1017 aapp = zero
1018 aaqq = big
1019 DO 4781 p = 1, n
1020 aapp = max( aapp, sva(p) )
1021 IF ( sva(p) .NE. zero ) aaqq = min( aaqq, sva(p) )
1022 4781 CONTINUE
1023*
1024* Quick return for zero M x N matrix
1025* #:)
1026 IF ( aapp .EQ. zero ) THEN
1027 IF ( lsvec ) CALL zlaset( 'G', m, n1, czero, cone, u, ldu )
1028 IF ( rsvec ) CALL zlaset( 'G', n, n, czero, cone, v, ldv )
1029 rwork(1) = one
1030 rwork(2) = one
1031 IF ( errest ) rwork(3) = one
1032 IF ( lsvec .AND. rsvec ) THEN
1033 rwork(4) = one
1034 rwork(5) = one
1035 END IF
1036 IF ( l2tran ) THEN
1037 rwork(6) = zero
1038 rwork(7) = zero
1039 END IF
1040 iwork(1) = 0
1041 iwork(2) = 0
1042 iwork(3) = 0
1043 iwork(4) = -1
1044 RETURN
1045 END IF
1046*
1047* Issue warning if denormalized column norms detected. Override the
1048* high relative accuracy request. Issue licence to kill nonzero columns
1049* (set them to zero) whose norm is less than sigma_max / BIG (roughly).
1050* #:(
1051 warning = 0
1052 IF ( aaqq .LE. sfmin ) THEN
1053 l2rank = .true.
1054 l2kill = .true.
1055 warning = 1
1056 END IF
1057*
1058* Quick return for one-column matrix
1059* #:)
1060 IF ( n .EQ. 1 ) THEN
1061*
1062 IF ( lsvec ) THEN
1063 CALL zlascl( 'G',0,0,sva(1),scalem, m,1,a(1,1),lda,ierr )
1064 CALL zlacpy( 'A', m, 1, a, lda, u, ldu )
1065* computing all M left singular vectors of the M x 1 matrix
1066 IF ( n1 .NE. n ) THEN
1067 CALL zgeqrf( m, n, u,ldu, cwork, cwork(n+1),lwork-n,ierr )
1068 CALL zungqr( m,n1,1, u,ldu,cwork,cwork(n+1),lwork-n,ierr )
1069 CALL zcopy( m, a(1,1), 1, u(1,1), 1 )
1070 END IF
1071 END IF
1072 IF ( rsvec ) THEN
1073 v(1,1) = cone
1074 END IF
1075 IF ( sva(1) .LT. (big*scalem) ) THEN
1076 sva(1) = sva(1) / scalem
1077 scalem = one
1078 END IF
1079 rwork(1) = one / scalem
1080 rwork(2) = one
1081 IF ( sva(1) .NE. zero ) THEN
1082 iwork(1) = 1
1083 IF ( ( sva(1) / scalem) .GE. sfmin ) THEN
1084 iwork(2) = 1
1085 ELSE
1086 iwork(2) = 0
1087 END IF
1088 ELSE
1089 iwork(1) = 0
1090 iwork(2) = 0
1091 END IF
1092 iwork(3) = 0
1093 iwork(4) = -1
1094 IF ( errest ) rwork(3) = one
1095 IF ( lsvec .AND. rsvec ) THEN
1096 rwork(4) = one
1097 rwork(5) = one
1098 END IF
1099 IF ( l2tran ) THEN
1100 rwork(6) = zero
1101 rwork(7) = zero
1102 END IF
1103 RETURN
1104*
1105 END IF
1106*
1107 transp = .false.
1108*
1109 aatmax = -one
1110 aatmin = big
1111 IF ( rowpiv .OR. l2tran ) THEN
1112*
1113* Compute the row norms, needed to determine row pivoting sequence
1114* (in the case of heavily row weighted A, row pivoting is strongly
1115* advised) and to collect information needed to compare the
1116* structures of A * A^* and A^* * A (in the case L2TRAN.EQ..TRUE.).
1117*
1118 IF ( l2tran ) THEN
1119 DO 1950 p = 1, m
1120 xsc = zero
1121 temp1 = one
1122 CALL zlassq( n, a(p,1), lda, xsc, temp1 )
1123* ZLASSQ gets both the ell_2 and the ell_infinity norm
1124* in one pass through the vector
1125 rwork(m+p) = xsc * scalem
1126 rwork(p) = xsc * (scalem*sqrt(temp1))
1127 aatmax = max( aatmax, rwork(p) )
1128 IF (rwork(p) .NE. zero)
1129 $ aatmin = min(aatmin,rwork(p))
1130 1950 CONTINUE
1131 ELSE
1132 DO 1904 p = 1, m
1133 rwork(m+p) = scalem*abs( a(p,izamax(n,a(p,1),lda)) )
1134 aatmax = max( aatmax, rwork(m+p) )
1135 aatmin = min( aatmin, rwork(m+p) )
1136 1904 CONTINUE
1137 END IF
1138*
1139 END IF
1140*
1141* For square matrix A try to determine whether A^* would be better
1142* input for the preconditioned Jacobi SVD, with faster convergence.
1143* The decision is based on an O(N) function of the vector of column
1144* and row norms of A, based on the Shannon entropy. This should give
1145* the right choice in most cases when the difference actually matters.
1146* It may fail and pick the slower converging side.
1147*
1148 entra = zero
1149 entrat = zero
1150 IF ( l2tran ) THEN
1151*
1152 xsc = zero
1153 temp1 = one
1154 CALL dlassq( n, sva, 1, xsc, temp1 )
1155 temp1 = one / temp1
1156*
1157 entra = zero
1158 DO 1113 p = 1, n
1159 big1 = ( ( sva(p) / xsc )**2 ) * temp1
1160 IF ( big1 .NE. zero ) entra = entra + big1 * dlog(big1)
1161 1113 CONTINUE
1162 entra = - entra / dlog(dble(n))
1163*
1164* Now, SVA().^2/Trace(A^* * A) is a point in the probability simplex.
1165* It is derived from the diagonal of A^* * A. Do the same with the
1166* diagonal of A * A^*, compute the entropy of the corresponding
1167* probability distribution. Note that A * A^* and A^* * A have the
1168* same trace.
1169*
1170 entrat = zero
1171 DO 1114 p = 1, m
1172 big1 = ( ( rwork(p) / xsc )**2 ) * temp1
1173 IF ( big1 .NE. zero ) entrat = entrat + big1 * dlog(big1)
1174 1114 CONTINUE
1175 entrat = - entrat / dlog(dble(m))
1176*
1177* Analyze the entropies and decide A or A^*. Smaller entropy
1178* usually means better input for the algorithm.
1179*
1180 transp = ( entrat .LT. entra )
1181*
1182* If A^* is better than A, take the adjoint of A. This is allowed
1183* only for square matrices, M=N.
1184 IF ( transp ) THEN
1185* In an optimal implementation, this trivial transpose
1186* should be replaced with faster transpose.
1187 DO 1115 p = 1, n - 1
1188 a(p,p) = conjg(a(p,p))
1189 DO 1116 q = p + 1, n
1190 ctemp = conjg(a(q,p))
1191 a(q,p) = conjg(a(p,q))
1192 a(p,q) = ctemp
1193 1116 CONTINUE
1194 1115 CONTINUE
1195 a(n,n) = conjg(a(n,n))
1196 DO 1117 p = 1, n
1197 rwork(m+p) = sva(p)
1198 sva(p) = rwork(p)
1199* previously computed row 2-norms are now column 2-norms
1200* of the transposed matrix
1201 1117 CONTINUE
1202 temp1 = aapp
1203 aapp = aatmax
1204 aatmax = temp1
1205 temp1 = aaqq
1206 aaqq = aatmin
1207 aatmin = temp1
1208 kill = lsvec
1209 lsvec = rsvec
1210 rsvec = kill
1211 IF ( lsvec ) n1 = n
1212*
1213 rowpiv = .true.
1214 END IF
1215*
1216 END IF
1217* END IF L2TRAN
1218*
1219* Scale the matrix so that its maximal singular value remains less
1220* than SQRT(BIG) -- the matrix is scaled so that its maximal column
1221* has Euclidean norm equal to SQRT(BIG/N). The only reason to keep
1222* SQRT(BIG) instead of BIG is the fact that ZGEJSV uses LAPACK and
1223* BLAS routines that, in some implementations, are not capable of
1224* working in the full interval [SFMIN,BIG] and that they may provoke
1225* overflows in the intermediate results. If the singular values spread
1226* from SFMIN to BIG, then ZGESVJ will compute them. So, in that case,
1227* one should use ZGESVJ instead of ZGEJSV.
1228* >> change in the April 2016 update: allow bigger range, i.e. the
1229* largest column is allowed up to BIG/N and ZGESVJ will do the rest.
1230 big1 = sqrt( big )
1231 temp1 = sqrt( big / dble(n) )
1232* TEMP1 = BIG/DBLE(N)
1233*
1234 CALL dlascl( 'G', 0, 0, aapp, temp1, n, 1, sva, n, ierr )
1235 IF ( aaqq .GT. (aapp * sfmin) ) THEN
1236 aaqq = ( aaqq / aapp ) * temp1
1237 ELSE
1238 aaqq = ( aaqq * temp1 ) / aapp
1239 END IF
1240 temp1 = temp1 * scalem
1241 CALL zlascl( 'G', 0, 0, aapp, temp1, m, n, a, lda, ierr )
1242*
1243* To undo scaling at the end of this procedure, multiply the
1244* computed singular values with USCAL2 / USCAL1.
1245*
1246 uscal1 = temp1
1247 uscal2 = aapp
1248*
1249 IF ( l2kill ) THEN
1250* L2KILL enforces computation of nonzero singular values in
1251* the restricted range of condition number of the initial A,
1252* sigma_max(A) / sigma_min(A) approx. SQRT(BIG)/SQRT(SFMIN).
1253 xsc = sqrt( sfmin )
1254 ELSE
1255 xsc = small
1256*
1257* Now, if the condition number of A is too big,
1258* sigma_max(A) / sigma_min(A) .GT. SQRT(BIG/N) * EPSLN / SFMIN,
1259* as a precaution measure, the full SVD is computed using ZGESVJ
1260* with accumulated Jacobi rotations. This provides numerically
1261* more robust computation, at the cost of slightly increased run
1262* time. Depending on the concrete implementation of BLAS and LAPACK
1263* (i.e. how they behave in presence of extreme ill-conditioning) the
1264* implementor may decide to remove this switch.
1265 IF ( ( aaqq.LT.sqrt(sfmin) ) .AND. lsvec .AND. rsvec ) THEN
1266 jracc = .true.
1267 END IF
1268*
1269 END IF
1270 IF ( aaqq .LT. xsc ) THEN
1271 DO 700 p = 1, n
1272 IF ( sva(p) .LT. xsc ) THEN
1273 CALL zlaset( 'A', m, 1, czero, czero, a(1,p), lda )
1274 sva(p) = zero
1275 END IF
1276 700 CONTINUE
1277 END IF
1278*
1279* Preconditioning using QR factorization with pivoting
1280*
1281 IF ( rowpiv ) THEN
1282* Optional row permutation (Bjoerck row pivoting):
1283* A result by Cox and Higham shows that the Bjoerck's
1284* row pivoting combined with standard column pivoting
1285* has similar effect as Powell-Reid complete pivoting.
1286* The ell-infinity norms of A are made nonincreasing.
1287 IF ( ( lsvec .AND. rsvec ) .AND. .NOT.( jracc ) ) THEN
1288 iwoff = 2*n
1289 ELSE
1290 iwoff = n
1291 END IF
1292 DO 1952 p = 1, m - 1
1293 q = idamax( m-p+1, rwork(m+p), 1 ) + p - 1
1294 iwork(iwoff+p) = q
1295 IF ( p .NE. q ) THEN
1296 temp1 = rwork(m+p)
1297 rwork(m+p) = rwork(m+q)
1298 rwork(m+q) = temp1
1299 END IF
1300 1952 CONTINUE
1301 CALL zlaswp( n, a, lda, 1, m-1, iwork(iwoff+1), 1 )
1302 END IF
1303*
1304* End of the preparation phase (scaling, optional sorting and
1305* transposing, optional flushing of small columns).
1306*
1307* Preconditioning
1308*
1309* If the full SVD is needed, the right singular vectors are computed
1310* from a matrix equation, and for that we need theoretical analysis
1311* of the Businger-Golub pivoting. So we use ZGEQP3 as the first RR QRF.
1312* In all other cases the first RR QRF can be chosen by other criteria
1313* (eg speed by replacing global with restricted window pivoting, such
1314* as in xGEQPX from TOMS # 782). Good results will be obtained using
1315* xGEQPX with properly (!) chosen numerical parameters.
1316* Any improvement of ZGEQP3 improves overall performance of ZGEJSV.
1317*
1318* A * P1 = Q1 * [ R1^* 0]^*:
1319 DO 1963 p = 1, n
1320* .. all columns are free columns
1321 iwork(p) = 0
1322 1963 CONTINUE
1323 CALL zgeqp3( m, n, a, lda, iwork, cwork, cwork(n+1), lwork-n,
1324 $ rwork, ierr )
1325*
1326* The upper triangular matrix R1 from the first QRF is inspected for
1327* rank deficiency and possibilities for deflation, or possible
1328* ill-conditioning. Depending on the user specified flag L2RANK,
1329* the procedure explores possibilities to reduce the numerical
1330* rank by inspecting the computed upper triangular factor. If
1331* L2RANK or L2ABER are up, then ZGEJSV will compute the SVD of
1332* A + dA, where ||dA|| <= f(M,N)*EPSLN.
1333*
1334 nr = 1
1335 IF ( l2aber ) THEN
1336* Standard absolute error bound suffices. All sigma_i with
1337* sigma_i < N*EPSLN*||A|| are flushed to zero. This is an
1338* aggressive enforcement of lower numerical rank by introducing a
1339* backward error of the order of N*EPSLN*||A||.
1340 temp1 = sqrt(dble(n))*epsln
1341 DO 3001 p = 2, n
1342 IF ( abs(a(p,p)) .GE. (temp1*abs(a(1,1))) ) THEN
1343 nr = nr + 1
1344 ELSE
1345 GO TO 3002
1346 END IF
1347 3001 CONTINUE
1348 3002 CONTINUE
1349 ELSE IF ( l2rank ) THEN
1350* .. similarly as above, only slightly more gentle (less aggressive).
1351* Sudden drop on the diagonal of R1 is used as the criterion for
1352* close-to-rank-deficient.
1353 temp1 = sqrt(sfmin)
1354 DO 3401 p = 2, n
1355 IF ( ( abs(a(p,p)) .LT. (epsln*abs(a(p-1,p-1))) ) .OR.
1356 $ ( abs(a(p,p)) .LT. small ) .OR.
1357 $ ( l2kill .AND. (abs(a(p,p)) .LT. temp1) ) ) GO TO 3402
1358 nr = nr + 1
1359 3401 CONTINUE
1360 3402 CONTINUE
1361*
1362 ELSE
1363* The goal is high relative accuracy. However, if the matrix
1364* has high scaled condition number the relative accuracy is in
1365* general not feasible. Later on, a condition number estimator
1366* will be deployed to estimate the scaled condition number.
1367* Here we just remove the underflowed part of the triangular
1368* factor. This prevents the situation in which the code is
1369* working hard to get the accuracy not warranted by the data.
1370 temp1 = sqrt(sfmin)
1371 DO 3301 p = 2, n
1372 IF ( ( abs(a(p,p)) .LT. small ) .OR.
1373 $ ( l2kill .AND. (abs(a(p,p)) .LT. temp1) ) ) GO TO 3302
1374 nr = nr + 1
1375 3301 CONTINUE
1376 3302 CONTINUE
1377*
1378 END IF
1379*
1380 almort = .false.
1381 IF ( nr .EQ. n ) THEN
1382 maxprj = one
1383 DO 3051 p = 2, n
1384 temp1 = abs(a(p,p)) / sva(iwork(p))
1385 maxprj = min( maxprj, temp1 )
1386 3051 CONTINUE
1387 IF ( maxprj**2 .GE. one - dble(n)*epsln ) almort = .true.
1388 END IF
1389*
1390*
1391 sconda = - one
1392 condr1 = - one
1393 condr2 = - one
1394*
1395 IF ( errest ) THEN
1396 IF ( n .EQ. nr ) THEN
1397 IF ( rsvec ) THEN
1398* .. V is available as workspace
1399 CALL zlacpy( 'U', n, n, a, lda, v, ldv )
1400 DO 3053 p = 1, n
1401 temp1 = sva(iwork(p))
1402 CALL zdscal( p, one/temp1, v(1,p), 1 )
1403 3053 CONTINUE
1404 IF ( lsvec )THEN
1405 CALL zpocon( 'U', n, v, ldv, one, temp1,
1406 $ cwork(n+1), rwork, ierr )
1407 ELSE
1408 CALL zpocon( 'U', n, v, ldv, one, temp1,
1409 $ cwork, rwork, ierr )
1410 END IF
1411*
1412 ELSE IF ( lsvec ) THEN
1413* .. U is available as workspace
1414 CALL zlacpy( 'U', n, n, a, lda, u, ldu )
1415 DO 3054 p = 1, n
1416 temp1 = sva(iwork(p))
1417 CALL zdscal( p, one/temp1, u(1,p), 1 )
1418 3054 CONTINUE
1419 CALL zpocon( 'U', n, u, ldu, one, temp1,
1420 $ cwork(n+1), rwork, ierr )
1421 ELSE
1422 CALL zlacpy( 'U', n, n, a, lda, cwork, n )
1423*[] CALL ZLACPY( 'U', N, N, A, LDA, CWORK(N+1), N )
1424* Change: here index shifted by N to the left, CWORK(1:N)
1425* not needed for SIGMA only computation
1426 DO 3052 p = 1, n
1427 temp1 = sva(iwork(p))
1428*[] CALL ZDSCAL( p, ONE/TEMP1, CWORK(N+(p-1)*N+1), 1 )
1429 CALL zdscal( p, one/temp1, cwork((p-1)*n+1), 1 )
1430 3052 CONTINUE
1431* .. the columns of R are scaled to have unit Euclidean lengths.
1432*[] CALL ZPOCON( 'U', N, CWORK(N+1), N, ONE, TEMP1,
1433*[] $ CWORK(N+N*N+1), RWORK, IERR )
1434 CALL zpocon( 'U', n, cwork, n, one, temp1,
1435 $ cwork(n*n+1), rwork, ierr )
1436*
1437 END IF
1438 IF ( temp1 .NE. zero ) THEN
1439 sconda = one / sqrt(temp1)
1440 ELSE
1441 sconda = - one
1442 END IF
1443* SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1).
1444* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
1445 ELSE
1446 sconda = - one
1447 END IF
1448 END IF
1449*
1450 l2pert = l2pert .AND. ( abs( a(1,1)/a(nr,nr) ) .GT. sqrt(big1) )
1451* If there is no violent scaling, artificial perturbation is not needed.
1452*
1453* Phase 3:
1454*
1455 IF ( .NOT. ( rsvec .OR. lsvec ) ) THEN
1456*
1457* Singular Values only
1458*
1459* .. transpose A(1:NR,1:N)
1460 DO 1946 p = 1, min( n-1, nr )
1461 CALL zcopy( n-p, a(p,p+1), lda, a(p+1,p), 1 )
1462 CALL zlacgv( n-p+1, a(p,p), 1 )
1463 1946 CONTINUE
1464 IF ( nr .EQ. n ) a(n,n) = conjg(a(n,n))
1465*
1466* The following two DO-loops introduce small relative perturbation
1467* into the strict upper triangle of the lower triangular matrix.
1468* Small entries below the main diagonal are also changed.
1469* This modification is useful if the computing environment does not
1470* provide/allow FLUSH TO ZERO underflow, for it prevents many
1471* annoying denormalized numbers in case of strongly scaled matrices.
1472* The perturbation is structured so that it does not introduce any
1473* new perturbation of the singular values, and it does not destroy
1474* the job done by the preconditioner.
1475* The licence for this perturbation is in the variable L2PERT, which
1476* should be .FALSE. if FLUSH TO ZERO underflow is active.
1477*
1478 IF ( .NOT. almort ) THEN
1479*
1480 IF ( l2pert ) THEN
1481* XSC = SQRT(SMALL)
1482 xsc = epsln / dble(n)
1483 DO 4947 q = 1, nr
1484 ctemp = dcmplx(xsc*abs(a(q,q)),zero)
1485 DO 4949 p = 1, n
1486 IF ( ( (p.GT.q) .AND. (abs(a(p,q)).LE.temp1) )
1487 $ .OR. ( p .LT. q ) )
1488* $ A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) )
1489 $ a(p,q) = ctemp
1490 4949 CONTINUE
1491 4947 CONTINUE
1492 ELSE
1493 CALL zlaset( 'U', nr-1,nr-1, czero,czero, a(1,2),lda )
1494 END IF
1495*
1496* .. second preconditioning using the QR factorization
1497*
1498 CALL zgeqrf( n,nr, a,lda, cwork, cwork(n+1),lwork-n, ierr )
1499*
1500* .. and transpose upper to lower triangular
1501 DO 1948 p = 1, nr - 1
1502 CALL zcopy( nr-p, a(p,p+1), lda, a(p+1,p), 1 )
1503 CALL zlacgv( nr-p+1, a(p,p), 1 )
1504 1948 CONTINUE
1505*
1506 END IF
1507*
1508* Row-cyclic Jacobi SVD algorithm with column pivoting
1509*
1510* .. again some perturbation (a "background noise") is added
1511* to drown denormals
1512 IF ( l2pert ) THEN
1513* XSC = SQRT(SMALL)
1514 xsc = epsln / dble(n)
1515 DO 1947 q = 1, nr
1516 ctemp = dcmplx(xsc*abs(a(q,q)),zero)
1517 DO 1949 p = 1, nr
1518 IF ( ( (p.GT.q) .AND. (abs(a(p,q)).LE.temp1) )
1519 $ .OR. ( p .LT. q ) )
1520* $ A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) )
1521 $ a(p,q) = ctemp
1522 1949 CONTINUE
1523 1947 CONTINUE
1524 ELSE
1525 CALL zlaset( 'U', nr-1, nr-1, czero, czero, a(1,2), lda )
1526 END IF
1527*
1528* .. and one-sided Jacobi rotations are started on a lower
1529* triangular matrix (plus perturbation which is ignored in
1530* the part which destroys triangular form (confusing?!))
1531*
1532 CALL zgesvj( 'L', 'N', 'N', nr, nr, a, lda, sva,
1533 $ n, v, ldv, cwork, lwork, rwork, lrwork, info )
1534*
1535 scalem = rwork(1)
1536 numrank = nint(rwork(2))
1537*
1538*
1539 ELSE IF ( ( rsvec .AND. ( .NOT. lsvec ) .AND. ( .NOT. jracc ) )
1540 $ .OR.
1541 $ ( jracc .AND. ( .NOT. lsvec ) .AND. ( nr .NE. n ) ) ) THEN
1542*
1543* -> Singular Values and Right Singular Vectors <-
1544*
1545 IF ( almort ) THEN
1546*
1547* .. in this case NR equals N
1548 DO 1998 p = 1, nr
1549 CALL zcopy( n-p+1, a(p,p), lda, v(p,p), 1 )
1550 CALL zlacgv( n-p+1, v(p,p), 1 )
1551 1998 CONTINUE
1552 CALL zlaset( 'U', nr-1,nr-1, czero, czero, v(1,2), ldv )
1553*
1554 CALL zgesvj( 'L','U','N', n, nr, v, ldv, sva, nr, a, lda,
1555 $ cwork, lwork, rwork, lrwork, info )
1556 scalem = rwork(1)
1557 numrank = nint(rwork(2))
1558
1559 ELSE
1560*
1561* .. two more QR factorizations ( one QRF is not enough, two require
1562* accumulated product of Jacobi rotations, three are perfect )
1563*
1564 CALL zlaset( 'L', nr-1,nr-1, czero, czero, a(2,1), lda )
1565 CALL zgelqf( nr,n, a, lda, cwork, cwork(n+1), lwork-n, ierr)
1566 CALL zlacpy( 'L', nr, nr, a, lda, v, ldv )
1567 CALL zlaset( 'U', nr-1,nr-1, czero, czero, v(1,2), ldv )
1568 CALL zgeqrf( nr, nr, v, ldv, cwork(n+1), cwork(2*n+1),
1569 $ lwork-2*n, ierr )
1570 DO 8998 p = 1, nr
1571 CALL zcopy( nr-p+1, v(p,p), ldv, v(p,p), 1 )
1572 CALL zlacgv( nr-p+1, v(p,p), 1 )
1573 8998 CONTINUE
1574 CALL zlaset('U', nr-1, nr-1, czero, czero, v(1,2), ldv)
1575*
1576 CALL zgesvj( 'L', 'U','N', nr, nr, v,ldv, sva, nr, u,
1577 $ ldu, cwork(n+1), lwork-n, rwork, lrwork, info )
1578 scalem = rwork(1)
1579 numrank = nint(rwork(2))
1580 IF ( nr .LT. n ) THEN
1581 CALL zlaset( 'A',n-nr, nr, czero,czero, v(nr+1,1), ldv )
1582 CALL zlaset( 'A',nr, n-nr, czero,czero, v(1,nr+1), ldv )
1583 CALL zlaset( 'A',n-nr,n-nr,czero,cone, v(nr+1,nr+1),ldv )
1584 END IF
1585*
1586 CALL zunmlq( 'L', 'C', n, n, nr, a, lda, cwork,
1587 $ v, ldv, cwork(n+1), lwork-n, ierr )
1588*
1589 END IF
1590* .. permute the rows of V
1591* DO 8991 p = 1, N
1592* CALL ZCOPY( N, V(p,1), LDV, A(IWORK(p),1), LDA )
1593* 8991 CONTINUE
1594* CALL ZLACPY( 'All', N, N, A, LDA, V, LDV )
1595 CALL zlapmr( .false., n, n, v, ldv, iwork )
1596*
1597 IF ( transp ) THEN
1598 CALL zlacpy( 'A', n, n, v, ldv, u, ldu )
1599 END IF
1600*
1601 ELSE IF ( jracc .AND. (.NOT. lsvec) .AND. ( nr.EQ. n ) ) THEN
1602*
1603 CALL zlaset( 'L', n-1,n-1, czero, czero, a(2,1), lda )
1604*
1605 CALL zgesvj( 'U','N','V', n, n, a, lda, sva, n, v, ldv,
1606 $ cwork, lwork, rwork, lrwork, info )
1607 scalem = rwork(1)
1608 numrank = nint(rwork(2))
1609 CALL zlapmr( .false., n, n, v, ldv, iwork )
1610*
1611 ELSE IF ( lsvec .AND. ( .NOT. rsvec ) ) THEN
1612*
1613* .. Singular Values and Left Singular Vectors ..
1614*
1615* .. second preconditioning step to avoid need to accumulate
1616* Jacobi rotations in the Jacobi iterations.
1617 DO 1965 p = 1, nr
1618 CALL zcopy( n-p+1, a(p,p), lda, u(p,p), 1 )
1619 CALL zlacgv( n-p+1, u(p,p), 1 )
1620 1965 CONTINUE
1621 CALL zlaset( 'U', nr-1, nr-1, czero, czero, u(1,2), ldu )
1622*
1623 CALL zgeqrf( n, nr, u, ldu, cwork(n+1), cwork(2*n+1),
1624 $ lwork-2*n, ierr )
1625*
1626 DO 1967 p = 1, nr - 1
1627 CALL zcopy( nr-p, u(p,p+1), ldu, u(p+1,p), 1 )
1628 CALL zlacgv( n-p+1, u(p,p), 1 )
1629 1967 CONTINUE
1630 CALL zlaset( 'U', nr-1, nr-1, czero, czero, u(1,2), ldu )
1631*
1632 CALL zgesvj( 'L', 'U', 'N', nr,nr, u, ldu, sva, nr, a,
1633 $ lda, cwork(n+1), lwork-n, rwork, lrwork, info )
1634 scalem = rwork(1)
1635 numrank = nint(rwork(2))
1636*
1637 IF ( nr .LT. m ) THEN
1638 CALL zlaset( 'A', m-nr, nr,czero, czero, u(nr+1,1), ldu )
1639 IF ( nr .LT. n1 ) THEN
1640 CALL zlaset( 'A',nr, n1-nr, czero, czero, u(1,nr+1),ldu )
1641 CALL zlaset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1),ldu )
1642 END IF
1643 END IF
1644*
1645 CALL zunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,
1646 $ ldu, cwork(n+1), lwork-n, ierr )
1647*
1648 IF ( rowpiv )
1649 $ CALL zlaswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 )
1650*
1651 DO 1974 p = 1, n1
1652 xsc = one / dznrm2( m, u(1,p), 1 )
1653 CALL zdscal( m, xsc, u(1,p), 1 )
1654 1974 CONTINUE
1655*
1656 IF ( transp ) THEN
1657 CALL zlacpy( 'A', n, n, u, ldu, v, ldv )
1658 END IF
1659*
1660 ELSE
1661*
1662* .. Full SVD ..
1663*
1664 IF ( .NOT. jracc ) THEN
1665*
1666 IF ( .NOT. almort ) THEN
1667*
1668* Second Preconditioning Step (QRF [with pivoting])
1669* Note that the composition of TRANSPOSE, QRF and TRANSPOSE is
1670* equivalent to an LQF CALL. Since in many libraries the QRF
1671* seems to be better optimized than the LQF, we do explicit
1672* transpose and use the QRF. This is subject to changes in an
1673* optimized implementation of ZGEJSV.
1674*
1675 DO 1968 p = 1, nr
1676 CALL zcopy( n-p+1, a(p,p), lda, v(p,p), 1 )
1677 CALL zlacgv( n-p+1, v(p,p), 1 )
1678 1968 CONTINUE
1679*
1680* .. the following two loops perturb small entries to avoid
1681* denormals in the second QR factorization, where they are
1682* as good as zeros. This is done to avoid painfully slow
1683* computation with denormals. The relative size of the perturbation
1684* is a parameter that can be changed by the implementer.
1685* This perturbation device will be obsolete on machines with
1686* properly implemented arithmetic.
1687* To switch it off, set L2PERT=.FALSE. To remove it from the
1688* code, remove the action under L2PERT=.TRUE., leave the ELSE part.
1689* The following two loops should be blocked and fused with the
1690* transposed copy above.
1691*
1692 IF ( l2pert ) THEN
1693 xsc = sqrt(small)
1694 DO 2969 q = 1, nr
1695 ctemp = dcmplx(xsc*abs( v(q,q) ),zero)
1696 DO 2968 p = 1, n
1697 IF ( ( p .GT. q ) .AND. ( abs(v(p,q)) .LE. temp1 )
1698 $ .OR. ( p .LT. q ) )
1699* $ V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) )
1700 $ v(p,q) = ctemp
1701 IF ( p .LT. q ) v(p,q) = - v(p,q)
1702 2968 CONTINUE
1703 2969 CONTINUE
1704 ELSE
1705 CALL zlaset( 'U', nr-1, nr-1, czero, czero, v(1,2), ldv )
1706 END IF
1707*
1708* Estimate the row scaled condition number of R1
1709* (If R1 is rectangular, N > NR, then the condition number
1710* of the leading NR x NR submatrix is estimated.)
1711*
1712 CALL zlacpy( 'L', nr, nr, v, ldv, cwork(2*n+1), nr )
1713 DO 3950 p = 1, nr
1714 temp1 = dznrm2(nr-p+1,cwork(2*n+(p-1)*nr+p),1)
1715 CALL zdscal(nr-p+1,one/temp1,cwork(2*n+(p-1)*nr+p),1)
1716 3950 CONTINUE
1717 CALL zpocon('L',nr,cwork(2*n+1),nr,one,temp1,
1718 $ cwork(2*n+nr*nr+1),rwork,ierr)
1719 condr1 = one / sqrt(temp1)
1720* .. here need a second opinion on the condition number
1721* .. then assume worst case scenario
1722* R1 is OK for inverse <=> CONDR1 .LT. DBLE(N)
1723* more conservative <=> CONDR1 .LT. SQRT(DBLE(N))
1724*
1725 cond_ok = sqrt(sqrt(dble(nr)))
1726*[TP] COND_OK is a tuning parameter.
1727*
1728 IF ( condr1 .LT. cond_ok ) THEN
1729* .. the second QRF without pivoting. Note: in an optimized
1730* implementation, this QRF should be implemented as the QRF
1731* of a lower triangular matrix.
1732* R1^* = Q2 * R2
1733 CALL zgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2*n+1),
1734 $ lwork-2*n, ierr )
1735*
1736 IF ( l2pert ) THEN
1737 xsc = sqrt(small)/epsln
1738 DO 3959 p = 2, nr
1739 DO 3958 q = 1, p - 1
1740 ctemp=dcmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),
1741 $ zero)
1742 IF ( abs(v(q,p)) .LE. temp1 )
1743* $ V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) )
1744 $ v(q,p) = ctemp
1745 3958 CONTINUE
1746 3959 CONTINUE
1747 END IF
1748*
1749 IF ( nr .NE. n )
1750 $ CALL zlacpy( 'A', n, nr, v, ldv, cwork(2*n+1), n )
1751* .. save ...
1752*
1753* .. this transposed copy should be better than naive
1754 DO 1969 p = 1, nr - 1
1755 CALL zcopy( nr-p, v(p,p+1), ldv, v(p+1,p), 1 )
1756 CALL zlacgv(nr-p+1, v(p,p), 1 )
1757 1969 CONTINUE
1758 v(nr,nr)=conjg(v(nr,nr))
1759*
1760 condr2 = condr1
1761*
1762 ELSE
1763*
1764* .. ill-conditioned case: second QRF with pivoting
1765* Note that windowed pivoting would be equally good
1766* numerically, and more run-time efficient. So, in
1767* an optimal implementation, the next call to ZGEQP3
1768* should be replaced with eg. CALL ZGEQPX (ACM TOMS #782)
1769* with properly (carefully) chosen parameters.
1770*
1771* R1^* * P2 = Q2 * R2
1772 DO 3003 p = 1, nr
1773 iwork(n+p) = 0
1774 3003 CONTINUE
1775 CALL zgeqp3( n, nr, v, ldv, iwork(n+1), cwork(n+1),
1776 $ cwork(2*n+1), lwork-2*n, rwork, ierr )
1777** CALL ZGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1),
1778** $ LWORK-2*N, IERR )
1779 IF ( l2pert ) THEN
1780 xsc = sqrt(small)
1781 DO 3969 p = 2, nr
1782 DO 3968 q = 1, p - 1
1783 ctemp=dcmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),
1784 $ zero)
1785 IF ( abs(v(q,p)) .LE. temp1 )
1786* $ V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) )
1787 $ v(q,p) = ctemp
1788 3968 CONTINUE
1789 3969 CONTINUE
1790 END IF
1791*
1792 CALL zlacpy( 'A', n, nr, v, ldv, cwork(2*n+1), n )
1793*
1794 IF ( l2pert ) THEN
1795 xsc = sqrt(small)
1796 DO 8970 p = 2, nr
1797 DO 8971 q = 1, p - 1
1798 ctemp=dcmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),
1799 $ zero)
1800* V(p,q) = - TEMP1*( V(q,p) / ABS(V(q,p)) )
1801 v(p,q) = - ctemp
1802 8971 CONTINUE
1803 8970 CONTINUE
1804 ELSE
1805 CALL zlaset( 'L',nr-1,nr-1,czero,czero,v(2,1),ldv )
1806 END IF
1807* Now, compute R2 = L3 * Q3, the LQ factorization.
1808 CALL zgelqf( nr, nr, v, ldv, cwork(2*n+n*nr+1),
1809 $ cwork(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr, ierr )
1810* .. and estimate the condition number
1811 CALL zlacpy( 'L',nr,nr,v,ldv,cwork(2*n+n*nr+nr+1),nr )
1812 DO 4950 p = 1, nr
1813 temp1 = dznrm2( p, cwork(2*n+n*nr+nr+p), nr )
1814 CALL zdscal( p, one/temp1, cwork(2*n+n*nr+nr+p), nr )
1815 4950 CONTINUE
1816 CALL zpocon( 'L',nr,cwork(2*n+n*nr+nr+1),nr,one,temp1,
1817 $ cwork(2*n+n*nr+nr+nr*nr+1),rwork,ierr )
1818 condr2 = one / sqrt(temp1)
1819*
1820*
1821 IF ( condr2 .GE. cond_ok ) THEN
1822* .. save the Householder vectors used for Q3
1823* (this overwrites the copy of R2, as it will not be
1824* needed in this branch, but it does not overwritte the
1825* Huseholder vectors of Q2.).
1826 CALL zlacpy( 'U', nr, nr, v, ldv, cwork(2*n+1), n )
1827* .. and the rest of the information on Q3 is in
1828* WORK(2*N+N*NR+1:2*N+N*NR+N)
1829 END IF
1830*
1831 END IF
1832*
1833 IF ( l2pert ) THEN
1834 xsc = sqrt(small)
1835 DO 4968 q = 2, nr
1836 ctemp = xsc * v(q,q)
1837 DO 4969 p = 1, q - 1
1838* V(p,q) = - TEMP1*( V(p,q) / ABS(V(p,q)) )
1839 v(p,q) = - ctemp
1840 4969 CONTINUE
1841 4968 CONTINUE
1842 ELSE
1843 CALL zlaset( 'U', nr-1,nr-1, czero,czero, v(1,2), ldv )
1844 END IF
1845*
1846* Second preconditioning finished; continue with Jacobi SVD
1847* The input matrix is lower trinagular.
1848*
1849* Recover the right singular vectors as solution of a well
1850* conditioned triangular matrix equation.
1851*
1852 IF ( condr1 .LT. cond_ok ) THEN
1853*
1854 CALL zgesvj( 'L','U','N',nr,nr,v,ldv,sva,nr,u, ldu,
1855 $ cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,rwork,
1856 $ lrwork, info )
1857 scalem = rwork(1)
1858 numrank = nint(rwork(2))
1859 DO 3970 p = 1, nr
1860 CALL zcopy( nr, v(1,p), 1, u(1,p), 1 )
1861 CALL zdscal( nr, sva(p), v(1,p), 1 )
1862 3970 CONTINUE
1863
1864* .. pick the right matrix equation and solve it
1865*
1866 IF ( nr .EQ. n ) THEN
1867* :)) .. best case, R1 is inverted. The solution of this matrix
1868* equation is Q2*V2 = the product of the Jacobi rotations
1869* used in ZGESVJ, premultiplied with the orthogonal matrix
1870* from the second QR factorization.
1871 CALL ztrsm('L','U','N','N', nr,nr,cone, a,lda, v,ldv)
1872 ELSE
1873* .. R1 is well conditioned, but non-square. Adjoint of R2
1874* is inverted to get the product of the Jacobi rotations
1875* used in ZGESVJ. The Q-factor from the second QR
1876* factorization is then built in explicitly.
1877 CALL ztrsm('L','U','C','N',nr,nr,cone,cwork(2*n+1),
1878 $ n,v,ldv)
1879 IF ( nr .LT. n ) THEN
1880 CALL zlaset('A',n-nr,nr,czero,czero,v(nr+1,1),ldv)
1881 CALL zlaset('A',nr,n-nr,czero,czero,v(1,nr+1),ldv)
1882 CALL zlaset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv)
1883 END IF
1884 CALL zunmqr('L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),
1885 $ v,ldv,cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr)
1886 END IF
1887*
1888 ELSE IF ( condr2 .LT. cond_ok ) THEN
1889*
1890* The matrix R2 is inverted. The solution of the matrix equation
1891* is Q3^* * V3 = the product of the Jacobi rotations (appplied to
1892* the lower triangular L3 from the LQ factorization of
1893* R2=L3*Q3), pre-multiplied with the transposed Q3.
1894 CALL zgesvj( 'L', 'U', 'N', nr, nr, v, ldv, sva, nr, u,
1895 $ ldu, cwork(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr,
1896 $ rwork, lrwork, info )
1897 scalem = rwork(1)
1898 numrank = nint(rwork(2))
1899 DO 3870 p = 1, nr
1900 CALL zcopy( nr, v(1,p), 1, u(1,p), 1 )
1901 CALL zdscal( nr, sva(p), u(1,p), 1 )
1902 3870 CONTINUE
1903 CALL ztrsm('L','U','N','N',nr,nr,cone,cwork(2*n+1),n,
1904 $ u,ldu)
1905* .. apply the permutation from the second QR factorization
1906 DO 873 q = 1, nr
1907 DO 872 p = 1, nr
1908 cwork(2*n+n*nr+nr+iwork(n+p)) = u(p,q)
1909 872 CONTINUE
1910 DO 874 p = 1, nr
1911 u(p,q) = cwork(2*n+n*nr+nr+p)
1912 874 CONTINUE
1913 873 CONTINUE
1914 IF ( nr .LT. n ) THEN
1915 CALL zlaset( 'A',n-nr,nr,czero,czero,v(nr+1,1),ldv )
1916 CALL zlaset( 'A',nr,n-nr,czero,czero,v(1,nr+1),ldv )
1917 CALL zlaset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv)
1918 END IF
1919 CALL zunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),
1920 $ v,ldv,cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
1921 ELSE
1922* Last line of defense.
1923* #:( This is a rather pathological case: no scaled condition
1924* improvement after two pivoted QR factorizations. Other
1925* possibility is that the rank revealing QR factorization
1926* or the condition estimator has failed, or the COND_OK
1927* is set very close to ONE (which is unnecessary). Normally,
1928* this branch should never be executed, but in rare cases of
1929* failure of the RRQR or condition estimator, the last line of
1930* defense ensures that ZGEJSV completes the task.
1931* Compute the full SVD of L3 using ZGESVJ with explicit
1932* accumulation of Jacobi rotations.
1933 CALL zgesvj( 'L', 'U', 'V', nr, nr, v, ldv, sva, nr, u,
1934 $ ldu, cwork(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr,
1935 $ rwork, lrwork, info )
1936 scalem = rwork(1)
1937 numrank = nint(rwork(2))
1938 IF ( nr .LT. n ) THEN
1939 CALL zlaset( 'A',n-nr,nr,czero,czero,v(nr+1,1),ldv )
1940 CALL zlaset( 'A',nr,n-nr,czero,czero,v(1,nr+1),ldv )
1941 CALL zlaset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv)
1942 END IF
1943 CALL zunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),
1944 $ v,ldv,cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
1945*
1946 CALL zunmlq( 'L', 'C', nr, nr, nr, cwork(2*n+1), n,
1947 $ cwork(2*n+n*nr+1), u, ldu, cwork(2*n+n*nr+nr+1),
1948 $ lwork-2*n-n*nr-nr, ierr )
1949 DO 773 q = 1, nr
1950 DO 772 p = 1, nr
1951 cwork(2*n+n*nr+nr+iwork(n+p)) = u(p,q)
1952 772 CONTINUE
1953 DO 774 p = 1, nr
1954 u(p,q) = cwork(2*n+n*nr+nr+p)
1955 774 CONTINUE
1956 773 CONTINUE
1957*
1958 END IF
1959*
1960* Permute the rows of V using the (column) permutation from the
1961* first QRF. Also, scale the columns to make them unit in
1962* Euclidean norm. This applies to all cases.
1963*
1964 temp1 = sqrt(dble(n)) * epsln
1965 DO 1972 q = 1, n
1966 DO 972 p = 1, n
1967 cwork(2*n+n*nr+nr+iwork(p)) = v(p,q)
1968 972 CONTINUE
1969 DO 973 p = 1, n
1970 v(p,q) = cwork(2*n+n*nr+nr+p)
1971 973 CONTINUE
1972 xsc = one / dznrm2( n, v(1,q), 1 )
1973 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1974 $ CALL zdscal( n, xsc, v(1,q), 1 )
1975 1972 CONTINUE
1976* At this moment, V contains the right singular vectors of A.
1977* Next, assemble the left singular vector matrix U (M x N).
1978 IF ( nr .LT. m ) THEN
1979 CALL zlaset('A', m-nr, nr, czero, czero, u(nr+1,1), ldu)
1980 IF ( nr .LT. n1 ) THEN
1981 CALL zlaset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu)
1982 CALL zlaset('A',m-nr,n1-nr,czero,cone,
1983 $ u(nr+1,nr+1),ldu)
1984 END IF
1985 END IF
1986*
1987* The Q matrix from the first QRF is built into the left singular
1988* matrix U. This applies to all cases.
1989*
1990 CALL zunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,
1991 $ ldu, cwork(n+1), lwork-n, ierr )
1992
1993* The columns of U are normalized. The cost is O(M*N) flops.
1994 temp1 = sqrt(dble(m)) * epsln
1995 DO 1973 p = 1, nr
1996 xsc = one / dznrm2( m, u(1,p), 1 )
1997 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1998 $ CALL zdscal( m, xsc, u(1,p), 1 )
1999 1973 CONTINUE
2000*
2001* If the initial QRF is computed with row pivoting, the left
2002* singular vectors must be adjusted.
2003*
2004 IF ( rowpiv )
2005 $ CALL zlaswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 )
2006*
2007 ELSE
2008*
2009* .. the initial matrix A has almost orthogonal columns and
2010* the second QRF is not needed
2011*
2012 CALL zlacpy( 'U', n, n, a, lda, cwork(n+1), n )
2013 IF ( l2pert ) THEN
2014 xsc = sqrt(small)
2015 DO 5970 p = 2, n
2016 ctemp = xsc * cwork( n + (p-1)*n + p )
2017 DO 5971 q = 1, p - 1
2018* CWORK(N+(q-1)*N+p)=-TEMP1 * ( CWORK(N+(p-1)*N+q) /
2019* $ ABS(CWORK(N+(p-1)*N+q)) )
2020 cwork(n+(q-1)*n+p)=-ctemp
2021 5971 CONTINUE
2022 5970 CONTINUE
2023 ELSE
2024 CALL zlaset( 'L',n-1,n-1,czero,czero,cwork(n+2),n )
2025 END IF
2026*
2027 CALL zgesvj( 'U', 'U', 'N', n, n, cwork(n+1), n, sva,
2028 $ n, u, ldu, cwork(n+n*n+1), lwork-n-n*n, rwork, lrwork,
2029 $ info )
2030*
2031 scalem = rwork(1)
2032 numrank = nint(rwork(2))
2033 DO 6970 p = 1, n
2034 CALL zcopy( n, cwork(n+(p-1)*n+1), 1, u(1,p), 1 )
2035 CALL zdscal( n, sva(p), cwork(n+(p-1)*n+1), 1 )
2036 6970 CONTINUE
2037*
2038 CALL ztrsm( 'L', 'U', 'N', 'N', n, n,
2039 $ cone, a, lda, cwork(n+1), n )
2040 DO 6972 p = 1, n
2041 CALL zcopy( n, cwork(n+p), n, v(iwork(p),1), ldv )
2042 6972 CONTINUE
2043 temp1 = sqrt(dble(n))*epsln
2044 DO 6971 p = 1, n
2045 xsc = one / dznrm2( n, v(1,p), 1 )
2046 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
2047 $ CALL zdscal( n, xsc, v(1,p), 1 )
2048 6971 CONTINUE
2049*
2050* Assemble the left singular vector matrix U (M x N).
2051*
2052 IF ( n .LT. m ) THEN
2053 CALL zlaset( 'A', m-n, n, czero, czero, u(n+1,1), ldu )
2054 IF ( n .LT. n1 ) THEN
2055 CALL zlaset('A',n, n1-n, czero, czero, u(1,n+1),ldu)
2056 CALL zlaset( 'A',m-n,n1-n, czero, cone,u(n+1,n+1),ldu)
2057 END IF
2058 END IF
2059 CALL zunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,
2060 $ ldu, cwork(n+1), lwork-n, ierr )
2061 temp1 = sqrt(dble(m))*epsln
2062 DO 6973 p = 1, n1
2063 xsc = one / dznrm2( m, u(1,p), 1 )
2064 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
2065 $ CALL zdscal( m, xsc, u(1,p), 1 )
2066 6973 CONTINUE
2067*
2068 IF ( rowpiv )
2069 $ CALL zlaswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 )
2070*
2071 END IF
2072*
2073* end of the >> almost orthogonal case << in the full SVD
2074*
2075 ELSE
2076*
2077* This branch deploys a preconditioned Jacobi SVD with explicitly
2078* accumulated rotations. It is included as optional, mainly for
2079* experimental purposes. It does perform well, and can also be used.
2080* In this implementation, this branch will be automatically activated
2081* if the condition number sigma_max(A) / sigma_min(A) is predicted
2082* to be greater than the overflow threshold. This is because the
2083* a posteriori computation of the singular vectors assumes robust
2084* implementation of BLAS and some LAPACK procedures, capable of working
2085* in presence of extreme values, e.g. when the singular values spread from
2086* the underflow to the overflow threshold.
2087*
2088 DO 7968 p = 1, nr
2089 CALL zcopy( n-p+1, a(p,p), lda, v(p,p), 1 )
2090 CALL zlacgv( n-p+1, v(p,p), 1 )
2091 7968 CONTINUE
2092*
2093 IF ( l2pert ) THEN
2094 xsc = sqrt(small/epsln)
2095 DO 5969 q = 1, nr
2096 ctemp = dcmplx(xsc*abs( v(q,q) ),zero)
2097 DO 5968 p = 1, n
2098 IF ( ( p .GT. q ) .AND. ( abs(v(p,q)) .LE. temp1 )
2099 $ .OR. ( p .LT. q ) )
2100* $ V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) )
2101 $ v(p,q) = ctemp
2102 IF ( p .LT. q ) v(p,q) = - v(p,q)
2103 5968 CONTINUE
2104 5969 CONTINUE
2105 ELSE
2106 CALL zlaset( 'U', nr-1, nr-1, czero, czero, v(1,2), ldv )
2107 END IF
2108
2109 CALL zgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2*n+1),
2110 $ lwork-2*n, ierr )
2111 CALL zlacpy( 'L', n, nr, v, ldv, cwork(2*n+1), n )
2112*
2113 DO 7969 p = 1, nr
2114 CALL zcopy( nr-p+1, v(p,p), ldv, u(p,p), 1 )
2115 CALL zlacgv( nr-p+1, u(p,p), 1 )
2116 7969 CONTINUE
2117
2118 IF ( l2pert ) THEN
2119 xsc = sqrt(small/epsln)
2120 DO 9970 q = 2, nr
2121 DO 9971 p = 1, q - 1
2122 ctemp = dcmplx(xsc * min(abs(u(p,p)),abs(u(q,q))),
2123 $ zero)
2124* U(p,q) = - TEMP1 * ( U(q,p) / ABS(U(q,p)) )
2125 u(p,q) = - ctemp
2126 9971 CONTINUE
2127 9970 CONTINUE
2128 ELSE
2129 CALL zlaset('U', nr-1, nr-1, czero, czero, u(1,2), ldu )
2130 END IF
2131
2132 CALL zgesvj( 'L', 'U', 'V', nr, nr, u, ldu, sva,
2133 $ n, v, ldv, cwork(2*n+n*nr+1), lwork-2*n-n*nr,
2134 $ rwork, lrwork, info )
2135 scalem = rwork(1)
2136 numrank = nint(rwork(2))
2137
2138 IF ( nr .LT. n ) THEN
2139 CALL zlaset( 'A',n-nr,nr,czero,czero,v(nr+1,1),ldv )
2140 CALL zlaset( 'A',nr,n-nr,czero,czero,v(1,nr+1),ldv )
2141 CALL zlaset( 'A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv )
2142 END IF
2143
2144 CALL zunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),
2145 $ v,ldv,cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
2146*
2147* Permute the rows of V using the (column) permutation from the
2148* first QRF. Also, scale the columns to make them unit in
2149* Euclidean norm. This applies to all cases.
2150*
2151 temp1 = sqrt(dble(n)) * epsln
2152 DO 7972 q = 1, n
2153 DO 8972 p = 1, n
2154 cwork(2*n+n*nr+nr+iwork(p)) = v(p,q)
2155 8972 CONTINUE
2156 DO 8973 p = 1, n
2157 v(p,q) = cwork(2*n+n*nr+nr+p)
2158 8973 CONTINUE
2159 xsc = one / dznrm2( n, v(1,q), 1 )
2160 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
2161 $ CALL zdscal( n, xsc, v(1,q), 1 )
2162 7972 CONTINUE
2163*
2164* At this moment, V contains the right singular vectors of A.
2165* Next, assemble the left singular vector matrix U (M x N).
2166*
2167 IF ( nr .LT. m ) THEN
2168 CALL zlaset( 'A', m-nr, nr, czero, czero, u(nr+1,1), ldu )
2169 IF ( nr .LT. n1 ) THEN
2170 CALL zlaset('A',nr, n1-nr, czero, czero, u(1,nr+1),ldu)
2171 CALL zlaset('A',m-nr,n1-nr, czero, cone,u(nr+1,nr+1),ldu)
2172 END IF
2173 END IF
2174*
2175 CALL zunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,
2176 $ ldu, cwork(n+1), lwork-n, ierr )
2177*
2178 IF ( rowpiv )
2179 $ CALL zlaswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 )
2180*
2181*
2182 END IF
2183 IF ( transp ) THEN
2184* .. swap U and V because the procedure worked on A^*
2185 DO 6974 p = 1, n
2186 CALL zswap( n, u(1,p), 1, v(1,p), 1 )
2187 6974 CONTINUE
2188 END IF
2189*
2190 END IF
2191* end of the full SVD
2192*
2193* Undo scaling, if necessary (and possible)
2194*
2195 IF ( uscal2 .LE. (big/sva(1))*uscal1 ) THEN
2196 CALL dlascl( 'G', 0, 0, uscal1, uscal2, nr, 1, sva, n, ierr )
2197 uscal1 = one
2198 uscal2 = one
2199 END IF
2200*
2201 IF ( nr .LT. n ) THEN
2202 DO 3004 p = nr+1, n
2203 sva(p) = zero
2204 3004 CONTINUE
2205 END IF
2206*
2207 rwork(1) = uscal2 * scalem
2208 rwork(2) = uscal1
2209 IF ( errest ) rwork(3) = sconda
2210 IF ( lsvec .AND. rsvec ) THEN
2211 rwork(4) = condr1
2212 rwork(5) = condr2
2213 END IF
2214 IF ( l2tran ) THEN
2215 rwork(6) = entra
2216 rwork(7) = entrat
2217 END IF
2218*
2219 iwork(1) = nr
2220 iwork(2) = numrank
2221 iwork(3) = warning
2222 IF ( transp ) THEN
2223 iwork(4) = 1
2224 ELSE
2225 iwork(4) = -1
2226 END IF
2227
2228*
2229 RETURN
2230* ..
2231* .. END OF ZGEJSV
2232* ..
subroutine dlassq(n, x, incx, scl, sumsq)
DLASSQ updates a sum of squares represented in scaled form.
Definition dlassq.f90:137
subroutine zlassq(n, x, incx, scl, sumsq)
ZLASSQ updates a sum of squares represented in scaled form.
Definition zlassq.f90:137
subroutine dlascl(type, kl, ku, cfrom, cto, m, n, a, lda, info)
DLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition dlascl.f:143
subroutine xerbla(srname, info)
XERBLA
Definition xerbla.f:60
logical function lsame(ca, cb)
LSAME
Definition lsame.f:53
integer function idamax(n, dx, incx)
IDAMAX
Definition idamax.f:71
integer function izamax(n, zx, incx)
IZAMAX
Definition izamax.f:71
subroutine zgelqf(m, n, a, lda, tau, work, lwork, info)
ZGELQF
Definition zgelqf.f:143
subroutine zgesvj(joba, jobu, jobv, m, n, a, lda, sva, mv, v, ldv, cwork, lwork, rwork, lrwork, info)
ZGESVJ
Definition zgesvj.f:351
subroutine zgeqp3(m, n, a, lda, jpvt, tau, work, lwork, rwork, info)
ZGEQP3
Definition zgeqp3.f:159
subroutine zlascl(type, kl, ku, cfrom, cto, m, n, a, lda, info)
ZLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition zlascl.f:143
subroutine zlacpy(uplo, m, n, a, lda, b, ldb)
ZLACPY copies all or part of one two-dimensional array to another.
Definition zlacpy.f:103
subroutine zlapmr(forwrd, m, n, x, ldx, k)
ZLAPMR rearranges rows of a matrix as specified by a permutation vector.
Definition zlapmr.f:104
subroutine zlaswp(n, a, lda, k1, k2, ipiv, incx)
ZLASWP performs a series of row interchanges on a general rectangular matrix.
Definition zlaswp.f:115
subroutine zlacgv(n, x, incx)
ZLACGV conjugates a complex vector.
Definition zlacgv.f:74
subroutine zlaset(uplo, m, n, alpha, beta, a, lda)
ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition zlaset.f:106
subroutine zunmlq(side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
ZUNMLQ
Definition zunmlq.f:167
subroutine zunmqr(side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
ZUNMQR
Definition zunmqr.f:167
subroutine zungqr(m, n, k, a, lda, tau, work, lwork, info)
ZUNGQR
Definition zungqr.f:128
subroutine zpocon(uplo, n, a, lda, anorm, rcond, work, rwork, info)
ZPOCON
Definition zpocon.f:121
subroutine zdscal(n, da, zx, incx)
ZDSCAL
Definition zdscal.f:78
subroutine zswap(n, zx, incx, zy, incy)
ZSWAP
Definition zswap.f:81
subroutine zcopy(n, zx, incx, zy, incy)
ZCOPY
Definition zcopy.f:81
subroutine ztrsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
ZTRSM
Definition ztrsm.f:180
subroutine dscal(n, da, dx, incx)
DSCAL
Definition dscal.f:79
real(wp) function dznrm2(n, x, incx)
DZNRM2
Definition dznrm2.f90:90
subroutine zgeqrf(m, n, a, lda, tau, work, lwork, info)
ZGEQRF VARIANT: left-looking Level 3 BLAS of the algorithm.
Definition zgeqrf.f:151
double precision function dlamch(cmach)
DLAMCH
Definition dlamch.f:69
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21

◆ zgesdd()

subroutine zgesdd ( character jobz,
integer m,
integer n,
complex*16, dimension( lda, * ) a,
integer lda,
double precision, dimension( * ) s,
complex*16, dimension( ldu, * ) u,
integer ldu,
complex*16, dimension( ldvt, * ) vt,
integer ldvt,
complex*16, dimension( * ) work,
integer lwork,
double precision, dimension( * ) rwork,
integer, dimension( * ) iwork,
integer info )

ZGESDD

Download ZGESDD + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> ZGESDD computes the singular value decomposition (SVD) of a complex
!> M-by-N matrix A, optionally computing the left and/or right singular
!> vectors, by using divide-and-conquer method. The SVD is written
!>
!>      A = U * SIGMA * conjugate-transpose(V)
!>
!> where SIGMA is an M-by-N matrix which is zero except for its
!> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
!> V is an N-by-N unitary matrix.  The diagonal elements of SIGMA
!> are the singular values of A; they are real and non-negative, and
!> are returned in descending order.  The first min(m,n) columns of
!> U and V are the left and right singular vectors of A.
!>
!> Note that the routine returns VT = V**H, not V.
!>
!> The divide and conquer algorithm makes very mild assumptions about
!> floating point arithmetic. It will work on machines with a guard
!> digit in add/subtract, or on those binary machines without guard
!> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
!> Cray-2. It could conceivably fail on hexadecimal or decimal machines
!> without guard digits, but we know of none.
!> 
Parameters
[in]JOBZ
!>          JOBZ is CHARACTER*1
!>          Specifies options for computing all or part of the matrix U:
!>          = 'A':  all M columns of U and all N rows of V**H are
!>                  returned in the arrays U and VT;
!>          = 'S':  the first min(M,N) columns of U and the first
!>                  min(M,N) rows of V**H are returned in the arrays U
!>                  and VT;
!>          = 'O':  If M >= N, the first N columns of U are overwritten
!>                  in the array A and all rows of V**H are returned in
!>                  the array VT;
!>                  otherwise, all columns of U are returned in the
!>                  array U and the first M rows of V**H are overwritten
!>                  in the array A;
!>          = 'N':  no columns of U or rows of V**H are computed.
!> 
[in]M
!>          M is INTEGER
!>          The number of rows of the input matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>          The number of columns of the input matrix A.  N >= 0.
!> 
[in,out]A
!>          A is COMPLEX*16 array, dimension (LDA,N)
!>          On entry, the M-by-N matrix A.
!>          On exit,
!>          if JOBZ = 'O',  A is overwritten with the first N columns
!>                          of U (the left singular vectors, stored
!>                          columnwise) if M >= N;
!>                          A is overwritten with the first M rows
!>                          of V**H (the right singular vectors, stored
!>                          rowwise) otherwise.
!>          if JOBZ .ne. 'O', the contents of A are destroyed.
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A.  LDA >= max(1,M).
!> 
[out]S
!>          S is DOUBLE PRECISION array, dimension (min(M,N))
!>          The singular values of A, sorted so that S(i) >= S(i+1).
!> 
[out]U
!>          U is COMPLEX*16 array, dimension (LDU,UCOL)
!>          UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
!>          UCOL = min(M,N) if JOBZ = 'S'.
!>          If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
!>          unitary matrix U;
!>          if JOBZ = 'S', U contains the first min(M,N) columns of U
!>          (the left singular vectors, stored columnwise);
!>          if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
!> 
[in]LDU
!>          LDU is INTEGER
!>          The leading dimension of the array U.  LDU >= 1;
!>          if JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
!> 
[out]VT
!>          VT is COMPLEX*16 array, dimension (LDVT,N)
!>          If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
!>          N-by-N unitary matrix V**H;
!>          if JOBZ = 'S', VT contains the first min(M,N) rows of
!>          V**H (the right singular vectors, stored rowwise);
!>          if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
!> 
[in]LDVT
!>          LDVT is INTEGER
!>          The leading dimension of the array VT.  LDVT >= 1;
!>          if JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
!>          if JOBZ = 'S', LDVT >= min(M,N).
!> 
[out]WORK
!>          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
!>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          The dimension of the array WORK. LWORK >= 1.
!>          If LWORK = -1, a workspace query is assumed.  The optimal
!>          size for the WORK array is calculated and stored in WORK(1),
!>          and no other work except argument checking is performed.
!>
!>          Let mx = max(M,N) and mn = min(M,N).
!>          If JOBZ = 'N', LWORK >= 2*mn + mx.
!>          If JOBZ = 'O', LWORK >= 2*mn*mn + 2*mn + mx.
!>          If JOBZ = 'S', LWORK >=   mn*mn + 3*mn.
!>          If JOBZ = 'A', LWORK >=   mn*mn + 2*mn + mx.
!>          These are not tight minimums in all cases; see comments inside code.
!>          For good performance, LWORK should generally be larger;
!>          a query is recommended.
!> 
[out]RWORK
!>          RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK))
!>          Let mx = max(M,N) and mn = min(M,N).
!>          If JOBZ = 'N',    LRWORK >= 5*mn (LAPACK <= 3.6 needs 7*mn);
!>          else if mx >> mn, LRWORK >= 5*mn*mn + 5*mn;
!>          else              LRWORK >= max( 5*mn*mn + 5*mn,
!>                                           2*mx*mn + 2*mn*mn + mn ).
!> 
[out]IWORK
!>          IWORK is INTEGER array, dimension (8*min(M,N))
!> 
[out]INFO
!>          INFO is INTEGER
!>          <  0:  if INFO = -i, the i-th argument had an illegal value.
!>          = -4:  if A had a NAN entry.
!>          >  0:  The updating process of DBDSDC did not converge.
!>          =  0:  successful exit.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Contributors:
Ming Gu and Huan Ren, Computer Science Division, University of California at Berkeley, USA

Definition at line 225 of file zgesdd.f.

227 implicit none
228*
229* -- LAPACK driver routine --
230* -- LAPACK is a software package provided by Univ. of Tennessee, --
231* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
232*
233* .. Scalar Arguments ..
234 CHARACTER JOBZ
235 INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
236* ..
237* .. Array Arguments ..
238 INTEGER IWORK( * )
239 DOUBLE PRECISION RWORK( * ), S( * )
240 COMPLEX*16 A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
241 $ WORK( * )
242* ..
243*
244* =====================================================================
245*
246* .. Parameters ..
247 COMPLEX*16 CZERO, CONE
248 parameter( czero = ( 0.0d+0, 0.0d+0 ),
249 $ cone = ( 1.0d+0, 0.0d+0 ) )
250 DOUBLE PRECISION ZERO, ONE
251 parameter( zero = 0.0d+0, one = 1.0d+0 )
252* ..
253* .. Local Scalars ..
254 LOGICAL LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS
255 INTEGER BLK, CHUNK, I, IE, IERR, IL, IR, IRU, IRVT,
256 $ ISCL, ITAU, ITAUP, ITAUQ, IU, IVT, LDWKVT,
257 $ LDWRKL, LDWRKR, LDWRKU, MAXWRK, MINMN, MINWRK,
258 $ MNTHR1, MNTHR2, NRWORK, NWORK, WRKBL
259 INTEGER LWORK_ZGEBRD_MN, LWORK_ZGEBRD_MM,
260 $ LWORK_ZGEBRD_NN, LWORK_ZGELQF_MN,
261 $ LWORK_ZGEQRF_MN,
262 $ LWORK_ZUNGBR_P_MN, LWORK_ZUNGBR_P_NN,
263 $ LWORK_ZUNGBR_Q_MN, LWORK_ZUNGBR_Q_MM,
264 $ LWORK_ZUNGLQ_MN, LWORK_ZUNGLQ_NN,
265 $ LWORK_ZUNGQR_MM, LWORK_ZUNGQR_MN,
266 $ LWORK_ZUNMBR_PRC_MM, LWORK_ZUNMBR_QLN_MM,
267 $ LWORK_ZUNMBR_PRC_MN, LWORK_ZUNMBR_QLN_MN,
268 $ LWORK_ZUNMBR_PRC_NN, LWORK_ZUNMBR_QLN_NN
269 DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM
270* ..
271* .. Local Arrays ..
272 INTEGER IDUM( 1 )
273 DOUBLE PRECISION DUM( 1 )
274 COMPLEX*16 CDUM( 1 )
275* ..
276* .. External Subroutines ..
277 EXTERNAL dbdsdc, dlascl, xerbla, zgebrd, zgelqf, zgemm,
280* ..
281* .. External Functions ..
282 LOGICAL LSAME, DISNAN
283 DOUBLE PRECISION DLAMCH, ZLANGE, DROUNDUP_LWORK
284 EXTERNAL lsame, dlamch, zlange, disnan,
286* ..
287* .. Intrinsic Functions ..
288 INTRINSIC int, max, min, sqrt
289* ..
290* .. Executable Statements ..
291*
292* Test the input arguments
293*
294 info = 0
295 minmn = min( m, n )
296 mnthr1 = int( minmn*17.0d0 / 9.0d0 )
297 mnthr2 = int( minmn*5.0d0 / 3.0d0 )
298 wntqa = lsame( jobz, 'A' )
299 wntqs = lsame( jobz, 'S' )
300 wntqas = wntqa .OR. wntqs
301 wntqo = lsame( jobz, 'O' )
302 wntqn = lsame( jobz, 'N' )
303 lquery = ( lwork.EQ.-1 )
304 minwrk = 1
305 maxwrk = 1
306*
307 IF( .NOT.( wntqa .OR. wntqs .OR. wntqo .OR. wntqn ) ) THEN
308 info = -1
309 ELSE IF( m.LT.0 ) THEN
310 info = -2
311 ELSE IF( n.LT.0 ) THEN
312 info = -3
313 ELSE IF( lda.LT.max( 1, m ) ) THEN
314 info = -5
315 ELSE IF( ldu.LT.1 .OR. ( wntqas .AND. ldu.LT.m ) .OR.
316 $ ( wntqo .AND. m.LT.n .AND. ldu.LT.m ) ) THEN
317 info = -8
318 ELSE IF( ldvt.LT.1 .OR. ( wntqa .AND. ldvt.LT.n ) .OR.
319 $ ( wntqs .AND. ldvt.LT.minmn ) .OR.
320 $ ( wntqo .AND. m.GE.n .AND. ldvt.LT.n ) ) THEN
321 info = -10
322 END IF
323*
324* Compute workspace
325* Note: Comments in the code beginning "Workspace:" describe the
326* minimal amount of workspace allocated at that point in the code,
327* as well as the preferred amount for good performance.
328* CWorkspace refers to complex workspace, and RWorkspace to
329* real workspace. NB refers to the optimal block size for the
330* immediately following subroutine, as returned by ILAENV.)
331*
332 IF( info.EQ.0 ) THEN
333 minwrk = 1
334 maxwrk = 1
335 IF( m.GE.n .AND. minmn.GT.0 ) THEN
336*
337* There is no complex work space needed for bidiagonal SVD
338* The real work space needed for bidiagonal SVD (dbdsdc) is
339* BDSPAC = 3*N*N + 4*N for singular values and vectors;
340* BDSPAC = 4*N for singular values only;
341* not including e, RU, and RVT matrices.
342*
343* Compute space preferred for each routine
344 CALL zgebrd( m, n, cdum(1), m, dum(1), dum(1), cdum(1),
345 $ cdum(1), cdum(1), -1, ierr )
346 lwork_zgebrd_mn = int( cdum(1) )
347*
348 CALL zgebrd( n, n, cdum(1), n, dum(1), dum(1), cdum(1),
349 $ cdum(1), cdum(1), -1, ierr )
350 lwork_zgebrd_nn = int( cdum(1) )
351*
352 CALL zgeqrf( m, n, cdum(1), m, cdum(1), cdum(1), -1, ierr )
353 lwork_zgeqrf_mn = int( cdum(1) )
354*
355 CALL zungbr( 'P', n, n, n, cdum(1), n, cdum(1), cdum(1),
356 $ -1, ierr )
357 lwork_zungbr_p_nn = int( cdum(1) )
358*
359 CALL zungbr( 'Q', m, m, n, cdum(1), m, cdum(1), cdum(1),
360 $ -1, ierr )
361 lwork_zungbr_q_mm = int( cdum(1) )
362*
363 CALL zungbr( 'Q', m, n, n, cdum(1), m, cdum(1), cdum(1),
364 $ -1, ierr )
365 lwork_zungbr_q_mn = int( cdum(1) )
366*
367 CALL zungqr( m, m, n, cdum(1), m, cdum(1), cdum(1),
368 $ -1, ierr )
369 lwork_zungqr_mm = int( cdum(1) )
370*
371 CALL zungqr( m, n, n, cdum(1), m, cdum(1), cdum(1),
372 $ -1, ierr )
373 lwork_zungqr_mn = int( cdum(1) )
374*
375 CALL zunmbr( 'P', 'R', 'C', n, n, n, cdum(1), n, cdum(1),
376 $ cdum(1), n, cdum(1), -1, ierr )
377 lwork_zunmbr_prc_nn = int( cdum(1) )
378*
379 CALL zunmbr( 'Q', 'L', 'N', m, m, n, cdum(1), m, cdum(1),
380 $ cdum(1), m, cdum(1), -1, ierr )
381 lwork_zunmbr_qln_mm = int( cdum(1) )
382*
383 CALL zunmbr( 'Q', 'L', 'N', m, n, n, cdum(1), m, cdum(1),
384 $ cdum(1), m, cdum(1), -1, ierr )
385 lwork_zunmbr_qln_mn = int( cdum(1) )
386*
387 CALL zunmbr( 'Q', 'L', 'N', n, n, n, cdum(1), n, cdum(1),
388 $ cdum(1), n, cdum(1), -1, ierr )
389 lwork_zunmbr_qln_nn = int( cdum(1) )
390*
391 IF( m.GE.mnthr1 ) THEN
392 IF( wntqn ) THEN
393*
394* Path 1 (M >> N, JOBZ='N')
395*
396 maxwrk = n + lwork_zgeqrf_mn
397 maxwrk = max( maxwrk, 2*n + lwork_zgebrd_nn )
398 minwrk = 3*n
399 ELSE IF( wntqo ) THEN
400*
401* Path 2 (M >> N, JOBZ='O')
402*
403 wrkbl = n + lwork_zgeqrf_mn
404 wrkbl = max( wrkbl, n + lwork_zungqr_mn )
405 wrkbl = max( wrkbl, 2*n + lwork_zgebrd_nn )
406 wrkbl = max( wrkbl, 2*n + lwork_zunmbr_qln_nn )
407 wrkbl = max( wrkbl, 2*n + lwork_zunmbr_prc_nn )
408 maxwrk = m*n + n*n + wrkbl
409 minwrk = 2*n*n + 3*n
410 ELSE IF( wntqs ) THEN
411*
412* Path 3 (M >> N, JOBZ='S')
413*
414 wrkbl = n + lwork_zgeqrf_mn
415 wrkbl = max( wrkbl, n + lwork_zungqr_mn )
416 wrkbl = max( wrkbl, 2*n + lwork_zgebrd_nn )
417 wrkbl = max( wrkbl, 2*n + lwork_zunmbr_qln_nn )
418 wrkbl = max( wrkbl, 2*n + lwork_zunmbr_prc_nn )
419 maxwrk = n*n + wrkbl
420 minwrk = n*n + 3*n
421 ELSE IF( wntqa ) THEN
422*
423* Path 4 (M >> N, JOBZ='A')
424*
425 wrkbl = n + lwork_zgeqrf_mn
426 wrkbl = max( wrkbl, n + lwork_zungqr_mm )
427 wrkbl = max( wrkbl, 2*n + lwork_zgebrd_nn )
428 wrkbl = max( wrkbl, 2*n + lwork_zunmbr_qln_nn )
429 wrkbl = max( wrkbl, 2*n + lwork_zunmbr_prc_nn )
430 maxwrk = n*n + wrkbl
431 minwrk = n*n + max( 3*n, n + m )
432 END IF
433 ELSE IF( m.GE.mnthr2 ) THEN
434*
435* Path 5 (M >> N, but not as much as MNTHR1)
436*
437 maxwrk = 2*n + lwork_zgebrd_mn
438 minwrk = 2*n + m
439 IF( wntqo ) THEN
440* Path 5o (M >> N, JOBZ='O')
441 maxwrk = max( maxwrk, 2*n + lwork_zungbr_p_nn )
442 maxwrk = max( maxwrk, 2*n + lwork_zungbr_q_mn )
443 maxwrk = maxwrk + m*n
444 minwrk = minwrk + n*n
445 ELSE IF( wntqs ) THEN
446* Path 5s (M >> N, JOBZ='S')
447 maxwrk = max( maxwrk, 2*n + lwork_zungbr_p_nn )
448 maxwrk = max( maxwrk, 2*n + lwork_zungbr_q_mn )
449 ELSE IF( wntqa ) THEN
450* Path 5a (M >> N, JOBZ='A')
451 maxwrk = max( maxwrk, 2*n + lwork_zungbr_p_nn )
452 maxwrk = max( maxwrk, 2*n + lwork_zungbr_q_mm )
453 END IF
454 ELSE
455*
456* Path 6 (M >= N, but not much larger)
457*
458 maxwrk = 2*n + lwork_zgebrd_mn
459 minwrk = 2*n + m
460 IF( wntqo ) THEN
461* Path 6o (M >= N, JOBZ='O')
462 maxwrk = max( maxwrk, 2*n + lwork_zunmbr_prc_nn )
463 maxwrk = max( maxwrk, 2*n + lwork_zunmbr_qln_mn )
464 maxwrk = maxwrk + m*n
465 minwrk = minwrk + n*n
466 ELSE IF( wntqs ) THEN
467* Path 6s (M >= N, JOBZ='S')
468 maxwrk = max( maxwrk, 2*n + lwork_zunmbr_qln_mn )
469 maxwrk = max( maxwrk, 2*n + lwork_zunmbr_prc_nn )
470 ELSE IF( wntqa ) THEN
471* Path 6a (M >= N, JOBZ='A')
472 maxwrk = max( maxwrk, 2*n + lwork_zunmbr_qln_mm )
473 maxwrk = max( maxwrk, 2*n + lwork_zunmbr_prc_nn )
474 END IF
475 END IF
476 ELSE IF( minmn.GT.0 ) THEN
477*
478* There is no complex work space needed for bidiagonal SVD
479* The real work space needed for bidiagonal SVD (dbdsdc) is
480* BDSPAC = 3*M*M + 4*M for singular values and vectors;
481* BDSPAC = 4*M for singular values only;
482* not including e, RU, and RVT matrices.
483*
484* Compute space preferred for each routine
485 CALL zgebrd( m, n, cdum(1), m, dum(1), dum(1), cdum(1),
486 $ cdum(1), cdum(1), -1, ierr )
487 lwork_zgebrd_mn = int( cdum(1) )
488*
489 CALL zgebrd( m, m, cdum(1), m, dum(1), dum(1), cdum(1),
490 $ cdum(1), cdum(1), -1, ierr )
491 lwork_zgebrd_mm = int( cdum(1) )
492*
493 CALL zgelqf( m, n, cdum(1), m, cdum(1), cdum(1), -1, ierr )
494 lwork_zgelqf_mn = int( cdum(1) )
495*
496 CALL zungbr( 'P', m, n, m, cdum(1), m, cdum(1), cdum(1),
497 $ -1, ierr )
498 lwork_zungbr_p_mn = int( cdum(1) )
499*
500 CALL zungbr( 'P', n, n, m, cdum(1), n, cdum(1), cdum(1),
501 $ -1, ierr )
502 lwork_zungbr_p_nn = int( cdum(1) )
503*
504 CALL zungbr( 'Q', m, m, n, cdum(1), m, cdum(1), cdum(1),
505 $ -1, ierr )
506 lwork_zungbr_q_mm = int( cdum(1) )
507*
508 CALL zunglq( m, n, m, cdum(1), m, cdum(1), cdum(1),
509 $ -1, ierr )
510 lwork_zunglq_mn = int( cdum(1) )
511*
512 CALL zunglq( n, n, m, cdum(1), n, cdum(1), cdum(1),
513 $ -1, ierr )
514 lwork_zunglq_nn = int( cdum(1) )
515*
516 CALL zunmbr( 'P', 'R', 'C', m, m, m, cdum(1), m, cdum(1),
517 $ cdum(1), m, cdum(1), -1, ierr )
518 lwork_zunmbr_prc_mm = int( cdum(1) )
519*
520 CALL zunmbr( 'P', 'R', 'C', m, n, m, cdum(1), m, cdum(1),
521 $ cdum(1), m, cdum(1), -1, ierr )
522 lwork_zunmbr_prc_mn = int( cdum(1) )
523*
524 CALL zunmbr( 'P', 'R', 'C', n, n, m, cdum(1), n, cdum(1),
525 $ cdum(1), n, cdum(1), -1, ierr )
526 lwork_zunmbr_prc_nn = int( cdum(1) )
527*
528 CALL zunmbr( 'Q', 'L', 'N', m, m, m, cdum(1), m, cdum(1),
529 $ cdum(1), m, cdum(1), -1, ierr )
530 lwork_zunmbr_qln_mm = int( cdum(1) )
531*
532 IF( n.GE.mnthr1 ) THEN
533 IF( wntqn ) THEN
534*
535* Path 1t (N >> M, JOBZ='N')
536*
537 maxwrk = m + lwork_zgelqf_mn
538 maxwrk = max( maxwrk, 2*m + lwork_zgebrd_mm )
539 minwrk = 3*m
540 ELSE IF( wntqo ) THEN
541*
542* Path 2t (N >> M, JOBZ='O')
543*
544 wrkbl = m + lwork_zgelqf_mn
545 wrkbl = max( wrkbl, m + lwork_zunglq_mn )
546 wrkbl = max( wrkbl, 2*m + lwork_zgebrd_mm )
547 wrkbl = max( wrkbl, 2*m + lwork_zunmbr_qln_mm )
548 wrkbl = max( wrkbl, 2*m + lwork_zunmbr_prc_mm )
549 maxwrk = m*n + m*m + wrkbl
550 minwrk = 2*m*m + 3*m
551 ELSE IF( wntqs ) THEN
552*
553* Path 3t (N >> M, JOBZ='S')
554*
555 wrkbl = m + lwork_zgelqf_mn
556 wrkbl = max( wrkbl, m + lwork_zunglq_mn )
557 wrkbl = max( wrkbl, 2*m + lwork_zgebrd_mm )
558 wrkbl = max( wrkbl, 2*m + lwork_zunmbr_qln_mm )
559 wrkbl = max( wrkbl, 2*m + lwork_zunmbr_prc_mm )
560 maxwrk = m*m + wrkbl
561 minwrk = m*m + 3*m
562 ELSE IF( wntqa ) THEN
563*
564* Path 4t (N >> M, JOBZ='A')
565*
566 wrkbl = m + lwork_zgelqf_mn
567 wrkbl = max( wrkbl, m + lwork_zunglq_nn )
568 wrkbl = max( wrkbl, 2*m + lwork_zgebrd_mm )
569 wrkbl = max( wrkbl, 2*m + lwork_zunmbr_qln_mm )
570 wrkbl = max( wrkbl, 2*m + lwork_zunmbr_prc_mm )
571 maxwrk = m*m + wrkbl
572 minwrk = m*m + max( 3*m, m + n )
573 END IF
574 ELSE IF( n.GE.mnthr2 ) THEN
575*
576* Path 5t (N >> M, but not as much as MNTHR1)
577*
578 maxwrk = 2*m + lwork_zgebrd_mn
579 minwrk = 2*m + n
580 IF( wntqo ) THEN
581* Path 5to (N >> M, JOBZ='O')
582 maxwrk = max( maxwrk, 2*m + lwork_zungbr_q_mm )
583 maxwrk = max( maxwrk, 2*m + lwork_zungbr_p_mn )
584 maxwrk = maxwrk + m*n
585 minwrk = minwrk + m*m
586 ELSE IF( wntqs ) THEN
587* Path 5ts (N >> M, JOBZ='S')
588 maxwrk = max( maxwrk, 2*m + lwork_zungbr_q_mm )
589 maxwrk = max( maxwrk, 2*m + lwork_zungbr_p_mn )
590 ELSE IF( wntqa ) THEN
591* Path 5ta (N >> M, JOBZ='A')
592 maxwrk = max( maxwrk, 2*m + lwork_zungbr_q_mm )
593 maxwrk = max( maxwrk, 2*m + lwork_zungbr_p_nn )
594 END IF
595 ELSE
596*
597* Path 6t (N > M, but not much larger)
598*
599 maxwrk = 2*m + lwork_zgebrd_mn
600 minwrk = 2*m + n
601 IF( wntqo ) THEN
602* Path 6to (N > M, JOBZ='O')
603 maxwrk = max( maxwrk, 2*m + lwork_zunmbr_qln_mm )
604 maxwrk = max( maxwrk, 2*m + lwork_zunmbr_prc_mn )
605 maxwrk = maxwrk + m*n
606 minwrk = minwrk + m*m
607 ELSE IF( wntqs ) THEN
608* Path 6ts (N > M, JOBZ='S')
609 maxwrk = max( maxwrk, 2*m + lwork_zunmbr_qln_mm )
610 maxwrk = max( maxwrk, 2*m + lwork_zunmbr_prc_mn )
611 ELSE IF( wntqa ) THEN
612* Path 6ta (N > M, JOBZ='A')
613 maxwrk = max( maxwrk, 2*m + lwork_zunmbr_qln_mm )
614 maxwrk = max( maxwrk, 2*m + lwork_zunmbr_prc_nn )
615 END IF
616 END IF
617 END IF
618 maxwrk = max( maxwrk, minwrk )
619 END IF
620 IF( info.EQ.0 ) THEN
621 work( 1 ) = droundup_lwork( maxwrk )
622 IF( lwork.LT.minwrk .AND. .NOT. lquery ) THEN
623 info = -12
624 END IF
625 END IF
626*
627 IF( info.NE.0 ) THEN
628 CALL xerbla( 'ZGESDD', -info )
629 RETURN
630 ELSE IF( lquery ) THEN
631 RETURN
632 END IF
633*
634* Quick return if possible
635*
636 IF( m.EQ.0 .OR. n.EQ.0 ) THEN
637 RETURN
638 END IF
639*
640* Get machine constants
641*
642 eps = dlamch( 'P' )
643 smlnum = sqrt( dlamch( 'S' ) ) / eps
644 bignum = one / smlnum
645*
646* Scale A if max element outside range [SMLNUM,BIGNUM]
647*
648 anrm = zlange( 'M', m, n, a, lda, dum )
649 IF( disnan( anrm ) ) THEN
650 info = -4
651 RETURN
652 END IF
653 iscl = 0
654 IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
655 iscl = 1
656 CALL zlascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
657 ELSE IF( anrm.GT.bignum ) THEN
658 iscl = 1
659 CALL zlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
660 END IF
661*
662 IF( m.GE.n ) THEN
663*
664* A has at least as many rows as columns. If A has sufficiently
665* more rows than columns, first reduce using the QR
666* decomposition (if sufficient workspace available)
667*
668 IF( m.GE.mnthr1 ) THEN
669*
670 IF( wntqn ) THEN
671*
672* Path 1 (M >> N, JOBZ='N')
673* No singular vectors to be computed
674*
675 itau = 1
676 nwork = itau + n
677*
678* Compute A=Q*R
679* CWorkspace: need N [tau] + N [work]
680* CWorkspace: prefer N [tau] + N*NB [work]
681* RWorkspace: need 0
682*
683 CALL zgeqrf( m, n, a, lda, work( itau ), work( nwork ),
684 $ lwork-nwork+1, ierr )
685*
686* Zero out below R
687*
688 CALL zlaset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),
689 $ lda )
690 ie = 1
691 itauq = 1
692 itaup = itauq + n
693 nwork = itaup + n
694*
695* Bidiagonalize R in A
696* CWorkspace: need 2*N [tauq, taup] + N [work]
697* CWorkspace: prefer 2*N [tauq, taup] + 2*N*NB [work]
698* RWorkspace: need N [e]
699*
700 CALL zgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),
701 $ work( itaup ), work( nwork ), lwork-nwork+1,
702 $ ierr )
703 nrwork = ie + n
704*
705* Perform bidiagonal SVD, compute singular values only
706* CWorkspace: need 0
707* RWorkspace: need N [e] + BDSPAC
708*
709 CALL dbdsdc( 'U', 'N', n, s, rwork( ie ), dum,1,dum,1,
710 $ dum, idum, rwork( nrwork ), iwork, info )
711*
712 ELSE IF( wntqo ) THEN
713*
714* Path 2 (M >> N, JOBZ='O')
715* N left singular vectors to be overwritten on A and
716* N right singular vectors to be computed in VT
717*
718 iu = 1
719*
720* WORK(IU) is N by N
721*
722 ldwrku = n
723 ir = iu + ldwrku*n
724 IF( lwork .GE. m*n + n*n + 3*n ) THEN
725*
726* WORK(IR) is M by N
727*
728 ldwrkr = m
729 ELSE
730 ldwrkr = ( lwork - n*n - 3*n ) / n
731 END IF
732 itau = ir + ldwrkr*n
733 nwork = itau + n
734*
735* Compute A=Q*R
736* CWorkspace: need N*N [U] + N*N [R] + N [tau] + N [work]
737* CWorkspace: prefer N*N [U] + N*N [R] + N [tau] + N*NB [work]
738* RWorkspace: need 0
739*
740 CALL zgeqrf( m, n, a, lda, work( itau ), work( nwork ),
741 $ lwork-nwork+1, ierr )
742*
743* Copy R to WORK( IR ), zeroing out below it
744*
745 CALL zlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
746 CALL zlaset( 'L', n-1, n-1, czero, czero, work( ir+1 ),
747 $ ldwrkr )
748*
749* Generate Q in A
750* CWorkspace: need N*N [U] + N*N [R] + N [tau] + N [work]
751* CWorkspace: prefer N*N [U] + N*N [R] + N [tau] + N*NB [work]
752* RWorkspace: need 0
753*
754 CALL zungqr( m, n, n, a, lda, work( itau ),
755 $ work( nwork ), lwork-nwork+1, ierr )
756 ie = 1
757 itauq = itau
758 itaup = itauq + n
759 nwork = itaup + n
760*
761* Bidiagonalize R in WORK(IR)
762* CWorkspace: need N*N [U] + N*N [R] + 2*N [tauq, taup] + N [work]
763* CWorkspace: prefer N*N [U] + N*N [R] + 2*N [tauq, taup] + 2*N*NB [work]
764* RWorkspace: need N [e]
765*
766 CALL zgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),
767 $ work( itauq ), work( itaup ), work( nwork ),
768 $ lwork-nwork+1, ierr )
769*
770* Perform bidiagonal SVD, computing left singular vectors
771* of R in WORK(IRU) and computing right singular vectors
772* of R in WORK(IRVT)
773* CWorkspace: need 0
774* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC
775*
776 iru = ie + n
777 irvt = iru + n*n
778 nrwork = irvt + n*n
779 CALL dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
780 $ n, rwork( irvt ), n, dum, idum,
781 $ rwork( nrwork ), iwork, info )
782*
783* Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
784* Overwrite WORK(IU) by the left singular vectors of R
785* CWorkspace: need N*N [U] + N*N [R] + 2*N [tauq, taup] + N [work]
786* CWorkspace: prefer N*N [U] + N*N [R] + 2*N [tauq, taup] + N*NB [work]
787* RWorkspace: need 0
788*
789 CALL zlacp2( 'F', n, n, rwork( iru ), n, work( iu ),
790 $ ldwrku )
791 CALL zunmbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,
792 $ work( itauq ), work( iu ), ldwrku,
793 $ work( nwork ), lwork-nwork+1, ierr )
794*
795* Copy real matrix RWORK(IRVT) to complex matrix VT
796* Overwrite VT by the right singular vectors of R
797* CWorkspace: need N*N [U] + N*N [R] + 2*N [tauq, taup] + N [work]
798* CWorkspace: prefer N*N [U] + N*N [R] + 2*N [tauq, taup] + N*NB [work]
799* RWorkspace: need 0
800*
801 CALL zlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt )
802 CALL zunmbr( 'P', 'R', 'C', n, n, n, work( ir ), ldwrkr,
803 $ work( itaup ), vt, ldvt, work( nwork ),
804 $ lwork-nwork+1, ierr )
805*
806* Multiply Q in A by left singular vectors of R in
807* WORK(IU), storing result in WORK(IR) and copying to A
808* CWorkspace: need N*N [U] + N*N [R]
809* CWorkspace: prefer N*N [U] + M*N [R]
810* RWorkspace: need 0
811*
812 DO 10 i = 1, m, ldwrkr
813 chunk = min( m-i+1, ldwrkr )
814 CALL zgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),
815 $ lda, work( iu ), ldwrku, czero,
816 $ work( ir ), ldwrkr )
817 CALL zlacpy( 'F', chunk, n, work( ir ), ldwrkr,
818 $ a( i, 1 ), lda )
819 10 CONTINUE
820*
821 ELSE IF( wntqs ) THEN
822*
823* Path 3 (M >> N, JOBZ='S')
824* N left singular vectors to be computed in U and
825* N right singular vectors to be computed in VT
826*
827 ir = 1
828*
829* WORK(IR) is N by N
830*
831 ldwrkr = n
832 itau = ir + ldwrkr*n
833 nwork = itau + n
834*
835* Compute A=Q*R
836* CWorkspace: need N*N [R] + N [tau] + N [work]
837* CWorkspace: prefer N*N [R] + N [tau] + N*NB [work]
838* RWorkspace: need 0
839*
840 CALL zgeqrf( m, n, a, lda, work( itau ), work( nwork ),
841 $ lwork-nwork+1, ierr )
842*
843* Copy R to WORK(IR), zeroing out below it
844*
845 CALL zlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
846 CALL zlaset( 'L', n-1, n-1, czero, czero, work( ir+1 ),
847 $ ldwrkr )
848*
849* Generate Q in A
850* CWorkspace: need N*N [R] + N [tau] + N [work]
851* CWorkspace: prefer N*N [R] + N [tau] + N*NB [work]
852* RWorkspace: need 0
853*
854 CALL zungqr( m, n, n, a, lda, work( itau ),
855 $ work( nwork ), lwork-nwork+1, ierr )
856 ie = 1
857 itauq = itau
858 itaup = itauq + n
859 nwork = itaup + n
860*
861* Bidiagonalize R in WORK(IR)
862* CWorkspace: need N*N [R] + 2*N [tauq, taup] + N [work]
863* CWorkspace: prefer N*N [R] + 2*N [tauq, taup] + 2*N*NB [work]
864* RWorkspace: need N [e]
865*
866 CALL zgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),
867 $ work( itauq ), work( itaup ), work( nwork ),
868 $ lwork-nwork+1, ierr )
869*
870* Perform bidiagonal SVD, computing left singular vectors
871* of bidiagonal matrix in RWORK(IRU) and computing right
872* singular vectors of bidiagonal matrix in RWORK(IRVT)
873* CWorkspace: need 0
874* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC
875*
876 iru = ie + n
877 irvt = iru + n*n
878 nrwork = irvt + n*n
879 CALL dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
880 $ n, rwork( irvt ), n, dum, idum,
881 $ rwork( nrwork ), iwork, info )
882*
883* Copy real matrix RWORK(IRU) to complex matrix U
884* Overwrite U by left singular vectors of R
885* CWorkspace: need N*N [R] + 2*N [tauq, taup] + N [work]
886* CWorkspace: prefer N*N [R] + 2*N [tauq, taup] + N*NB [work]
887* RWorkspace: need 0
888*
889 CALL zlacp2( 'F', n, n, rwork( iru ), n, u, ldu )
890 CALL zunmbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,
891 $ work( itauq ), u, ldu, work( nwork ),
892 $ lwork-nwork+1, ierr )
893*
894* Copy real matrix RWORK(IRVT) to complex matrix VT
895* Overwrite VT by right singular vectors of R
896* CWorkspace: need N*N [R] + 2*N [tauq, taup] + N [work]
897* CWorkspace: prefer N*N [R] + 2*N [tauq, taup] + N*NB [work]
898* RWorkspace: need 0
899*
900 CALL zlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt )
901 CALL zunmbr( 'P', 'R', 'C', n, n, n, work( ir ), ldwrkr,
902 $ work( itaup ), vt, ldvt, work( nwork ),
903 $ lwork-nwork+1, ierr )
904*
905* Multiply Q in A by left singular vectors of R in
906* WORK(IR), storing result in U
907* CWorkspace: need N*N [R]
908* RWorkspace: need 0
909*
910 CALL zlacpy( 'F', n, n, u, ldu, work( ir ), ldwrkr )
911 CALL zgemm( 'N', 'N', m, n, n, cone, a, lda, work( ir ),
912 $ ldwrkr, czero, u, ldu )
913*
914 ELSE IF( wntqa ) THEN
915*
916* Path 4 (M >> N, JOBZ='A')
917* M left singular vectors to be computed in U and
918* N right singular vectors to be computed in VT
919*
920 iu = 1
921*
922* WORK(IU) is N by N
923*
924 ldwrku = n
925 itau = iu + ldwrku*n
926 nwork = itau + n
927*
928* Compute A=Q*R, copying result to U
929* CWorkspace: need N*N [U] + N [tau] + N [work]
930* CWorkspace: prefer N*N [U] + N [tau] + N*NB [work]
931* RWorkspace: need 0
932*
933 CALL zgeqrf( m, n, a, lda, work( itau ), work( nwork ),
934 $ lwork-nwork+1, ierr )
935 CALL zlacpy( 'L', m, n, a, lda, u, ldu )
936*
937* Generate Q in U
938* CWorkspace: need N*N [U] + N [tau] + M [work]
939* CWorkspace: prefer N*N [U] + N [tau] + M*NB [work]
940* RWorkspace: need 0
941*
942 CALL zungqr( m, m, n, u, ldu, work( itau ),
943 $ work( nwork ), lwork-nwork+1, ierr )
944*
945* Produce R in A, zeroing out below it
946*
947 CALL zlaset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),
948 $ lda )
949 ie = 1
950 itauq = itau
951 itaup = itauq + n
952 nwork = itaup + n
953*
954* Bidiagonalize R in A
955* CWorkspace: need N*N [U] + 2*N [tauq, taup] + N [work]
956* CWorkspace: prefer N*N [U] + 2*N [tauq, taup] + 2*N*NB [work]
957* RWorkspace: need N [e]
958*
959 CALL zgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),
960 $ work( itaup ), work( nwork ), lwork-nwork+1,
961 $ ierr )
962 iru = ie + n
963 irvt = iru + n*n
964 nrwork = irvt + n*n
965*
966* Perform bidiagonal SVD, computing left singular vectors
967* of bidiagonal matrix in RWORK(IRU) and computing right
968* singular vectors of bidiagonal matrix in RWORK(IRVT)
969* CWorkspace: need 0
970* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC
971*
972 CALL dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
973 $ n, rwork( irvt ), n, dum, idum,
974 $ rwork( nrwork ), iwork, info )
975*
976* Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
977* Overwrite WORK(IU) by left singular vectors of R
978* CWorkspace: need N*N [U] + 2*N [tauq, taup] + N [work]
979* CWorkspace: prefer N*N [U] + 2*N [tauq, taup] + N*NB [work]
980* RWorkspace: need 0
981*
982 CALL zlacp2( 'F', n, n, rwork( iru ), n, work( iu ),
983 $ ldwrku )
984 CALL zunmbr( 'Q', 'L', 'N', n, n, n, a, lda,
985 $ work( itauq ), work( iu ), ldwrku,
986 $ work( nwork ), lwork-nwork+1, ierr )
987*
988* Copy real matrix RWORK(IRVT) to complex matrix VT
989* Overwrite VT by right singular vectors of R
990* CWorkspace: need N*N [U] + 2*N [tauq, taup] + N [work]
991* CWorkspace: prefer N*N [U] + 2*N [tauq, taup] + N*NB [work]
992* RWorkspace: need 0
993*
994 CALL zlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt )
995 CALL zunmbr( 'P', 'R', 'C', n, n, n, a, lda,
996 $ work( itaup ), vt, ldvt, work( nwork ),
997 $ lwork-nwork+1, ierr )
998*
999* Multiply Q in U by left singular vectors of R in
1000* WORK(IU), storing result in A
1001* CWorkspace: need N*N [U]
1002* RWorkspace: need 0
1003*
1004 CALL zgemm( 'N', 'N', m, n, n, cone, u, ldu, work( iu ),
1005 $ ldwrku, czero, a, lda )
1006*
1007* Copy left singular vectors of A from A to U
1008*
1009 CALL zlacpy( 'F', m, n, a, lda, u, ldu )
1010*
1011 END IF
1012*
1013 ELSE IF( m.GE.mnthr2 ) THEN
1014*
1015* MNTHR2 <= M < MNTHR1
1016*
1017* Path 5 (M >> N, but not as much as MNTHR1)
1018* Reduce to bidiagonal form without QR decomposition, use
1019* ZUNGBR and matrix multiplication to compute singular vectors
1020*
1021 ie = 1
1022 nrwork = ie + n
1023 itauq = 1
1024 itaup = itauq + n
1025 nwork = itaup + n
1026*
1027* Bidiagonalize A
1028* CWorkspace: need 2*N [tauq, taup] + M [work]
1029* CWorkspace: prefer 2*N [tauq, taup] + (M+N)*NB [work]
1030* RWorkspace: need N [e]
1031*
1032 CALL zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
1033 $ work( itaup ), work( nwork ), lwork-nwork+1,
1034 $ ierr )
1035 IF( wntqn ) THEN
1036*
1037* Path 5n (M >> N, JOBZ='N')
1038* Compute singular values only
1039* CWorkspace: need 0
1040* RWorkspace: need N [e] + BDSPAC
1041*
1042 CALL dbdsdc( 'U', 'N', n, s, rwork( ie ), dum, 1,dum,1,
1043 $ dum, idum, rwork( nrwork ), iwork, info )
1044 ELSE IF( wntqo ) THEN
1045 iu = nwork
1046 iru = nrwork
1047 irvt = iru + n*n
1048 nrwork = irvt + n*n
1049*
1050* Path 5o (M >> N, JOBZ='O')
1051* Copy A to VT, generate P**H
1052* CWorkspace: need 2*N [tauq, taup] + N [work]
1053* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work]
1054* RWorkspace: need 0
1055*
1056 CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
1057 CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1058 $ work( nwork ), lwork-nwork+1, ierr )
1059*
1060* Generate Q in A
1061* CWorkspace: need 2*N [tauq, taup] + N [work]
1062* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work]
1063* RWorkspace: need 0
1064*
1065 CALL zungbr( 'Q', m, n, n, a, lda, work( itauq ),
1066 $ work( nwork ), lwork-nwork+1, ierr )
1067*
1068 IF( lwork .GE. m*n + 3*n ) THEN
1069*
1070* WORK( IU ) is M by N
1071*
1072 ldwrku = m
1073 ELSE
1074*
1075* WORK(IU) is LDWRKU by N
1076*
1077 ldwrku = ( lwork - 3*n ) / n
1078 END IF
1079 nwork = iu + ldwrku*n
1080*
1081* Perform bidiagonal SVD, computing left singular vectors
1082* of bidiagonal matrix in RWORK(IRU) and computing right
1083* singular vectors of bidiagonal matrix in RWORK(IRVT)
1084* CWorkspace: need 0
1085* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC
1086*
1087 CALL dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
1088 $ n, rwork( irvt ), n, dum, idum,
1089 $ rwork( nrwork ), iwork, info )
1090*
1091* Multiply real matrix RWORK(IRVT) by P**H in VT,
1092* storing the result in WORK(IU), copying to VT
1093* CWorkspace: need 2*N [tauq, taup] + N*N [U]
1094* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + 2*N*N [rwork]
1095*
1096 CALL zlarcm( n, n, rwork( irvt ), n, vt, ldvt,
1097 $ work( iu ), ldwrku, rwork( nrwork ) )
1098 CALL zlacpy( 'F', n, n, work( iu ), ldwrku, vt, ldvt )
1099*
1100* Multiply Q in A by real matrix RWORK(IRU), storing the
1101* result in WORK(IU), copying to A
1102* CWorkspace: need 2*N [tauq, taup] + N*N [U]
1103* CWorkspace: prefer 2*N [tauq, taup] + M*N [U]
1104* RWorkspace: need N [e] + N*N [RU] + 2*N*N [rwork]
1105* RWorkspace: prefer N [e] + N*N [RU] + 2*M*N [rwork] < N + 5*N*N since M < 2*N here
1106*
1107 nrwork = irvt
1108 DO 20 i = 1, m, ldwrku
1109 chunk = min( m-i+1, ldwrku )
1110 CALL zlacrm( chunk, n, a( i, 1 ), lda, rwork( iru ),
1111 $ n, work( iu ), ldwrku, rwork( nrwork ) )
1112 CALL zlacpy( 'F', chunk, n, work( iu ), ldwrku,
1113 $ a( i, 1 ), lda )
1114 20 CONTINUE
1115*
1116 ELSE IF( wntqs ) THEN
1117*
1118* Path 5s (M >> N, JOBZ='S')
1119* Copy A to VT, generate P**H
1120* CWorkspace: need 2*N [tauq, taup] + N [work]
1121* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work]
1122* RWorkspace: need 0
1123*
1124 CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
1125 CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1126 $ work( nwork ), lwork-nwork+1, ierr )
1127*
1128* Copy A to U, generate Q
1129* CWorkspace: need 2*N [tauq, taup] + N [work]
1130* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work]
1131* RWorkspace: need 0
1132*
1133 CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1134 CALL zungbr( 'Q', m, n, n, u, ldu, work( itauq ),
1135 $ work( nwork ), lwork-nwork+1, ierr )
1136*
1137* Perform bidiagonal SVD, computing left singular vectors
1138* of bidiagonal matrix in RWORK(IRU) and computing right
1139* singular vectors of bidiagonal matrix in RWORK(IRVT)
1140* CWorkspace: need 0
1141* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC
1142*
1143 iru = nrwork
1144 irvt = iru + n*n
1145 nrwork = irvt + n*n
1146 CALL dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
1147 $ n, rwork( irvt ), n, dum, idum,
1148 $ rwork( nrwork ), iwork, info )
1149*
1150* Multiply real matrix RWORK(IRVT) by P**H in VT,
1151* storing the result in A, copying to VT
1152* CWorkspace: need 0
1153* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + 2*N*N [rwork]
1154*
1155 CALL zlarcm( n, n, rwork( irvt ), n, vt, ldvt, a, lda,
1156 $ rwork( nrwork ) )
1157 CALL zlacpy( 'F', n, n, a, lda, vt, ldvt )
1158*
1159* Multiply Q in U by real matrix RWORK(IRU), storing the
1160* result in A, copying to U
1161* CWorkspace: need 0
1162* RWorkspace: need N [e] + N*N [RU] + 2*M*N [rwork] < N + 5*N*N since M < 2*N here
1163*
1164 nrwork = irvt
1165 CALL zlacrm( m, n, u, ldu, rwork( iru ), n, a, lda,
1166 $ rwork( nrwork ) )
1167 CALL zlacpy( 'F', m, n, a, lda, u, ldu )
1168 ELSE
1169*
1170* Path 5a (M >> N, JOBZ='A')
1171* Copy A to VT, generate P**H
1172* CWorkspace: need 2*N [tauq, taup] + N [work]
1173* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work]
1174* RWorkspace: need 0
1175*
1176 CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
1177 CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1178 $ work( nwork ), lwork-nwork+1, ierr )
1179*
1180* Copy A to U, generate Q
1181* CWorkspace: need 2*N [tauq, taup] + M [work]
1182* CWorkspace: prefer 2*N [tauq, taup] + M*NB [work]
1183* RWorkspace: need 0
1184*
1185 CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1186 CALL zungbr( 'Q', m, m, n, u, ldu, work( itauq ),
1187 $ work( nwork ), lwork-nwork+1, ierr )
1188*
1189* Perform bidiagonal SVD, computing left singular vectors
1190* of bidiagonal matrix in RWORK(IRU) and computing right
1191* singular vectors of bidiagonal matrix in RWORK(IRVT)
1192* CWorkspace: need 0
1193* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC
1194*
1195 iru = nrwork
1196 irvt = iru + n*n
1197 nrwork = irvt + n*n
1198 CALL dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
1199 $ n, rwork( irvt ), n, dum, idum,
1200 $ rwork( nrwork ), iwork, info )
1201*
1202* Multiply real matrix RWORK(IRVT) by P**H in VT,
1203* storing the result in A, copying to VT
1204* CWorkspace: need 0
1205* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + 2*N*N [rwork]
1206*
1207 CALL zlarcm( n, n, rwork( irvt ), n, vt, ldvt, a, lda,
1208 $ rwork( nrwork ) )
1209 CALL zlacpy( 'F', n, n, a, lda, vt, ldvt )
1210*
1211* Multiply Q in U by real matrix RWORK(IRU), storing the
1212* result in A, copying to U
1213* CWorkspace: need 0
1214* RWorkspace: need N [e] + N*N [RU] + 2*M*N [rwork] < N + 5*N*N since M < 2*N here
1215*
1216 nrwork = irvt
1217 CALL zlacrm( m, n, u, ldu, rwork( iru ), n, a, lda,
1218 $ rwork( nrwork ) )
1219 CALL zlacpy( 'F', m, n, a, lda, u, ldu )
1220 END IF
1221*
1222 ELSE
1223*
1224* M .LT. MNTHR2
1225*
1226* Path 6 (M >= N, but not much larger)
1227* Reduce to bidiagonal form without QR decomposition
1228* Use ZUNMBR to compute singular vectors
1229*
1230 ie = 1
1231 nrwork = ie + n
1232 itauq = 1
1233 itaup = itauq + n
1234 nwork = itaup + n
1235*
1236* Bidiagonalize A
1237* CWorkspace: need 2*N [tauq, taup] + M [work]
1238* CWorkspace: prefer 2*N [tauq, taup] + (M+N)*NB [work]
1239* RWorkspace: need N [e]
1240*
1241 CALL zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
1242 $ work( itaup ), work( nwork ), lwork-nwork+1,
1243 $ ierr )
1244 IF( wntqn ) THEN
1245*
1246* Path 6n (M >= N, JOBZ='N')
1247* Compute singular values only
1248* CWorkspace: need 0
1249* RWorkspace: need N [e] + BDSPAC
1250*
1251 CALL dbdsdc( 'U', 'N', n, s, rwork( ie ), dum,1,dum,1,
1252 $ dum, idum, rwork( nrwork ), iwork, info )
1253 ELSE IF( wntqo ) THEN
1254 iu = nwork
1255 iru = nrwork
1256 irvt = iru + n*n
1257 nrwork = irvt + n*n
1258 IF( lwork .GE. m*n + 3*n ) THEN
1259*
1260* WORK( IU ) is M by N
1261*
1262 ldwrku = m
1263 ELSE
1264*
1265* WORK( IU ) is LDWRKU by N
1266*
1267 ldwrku = ( lwork - 3*n ) / n
1268 END IF
1269 nwork = iu + ldwrku*n
1270*
1271* Path 6o (M >= N, JOBZ='O')
1272* Perform bidiagonal SVD, computing left singular vectors
1273* of bidiagonal matrix in RWORK(IRU) and computing right
1274* singular vectors of bidiagonal matrix in RWORK(IRVT)
1275* CWorkspace: need 0
1276* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC
1277*
1278 CALL dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
1279 $ n, rwork( irvt ), n, dum, idum,
1280 $ rwork( nrwork ), iwork, info )
1281*
1282* Copy real matrix RWORK(IRVT) to complex matrix VT
1283* Overwrite VT by right singular vectors of A
1284* CWorkspace: need 2*N [tauq, taup] + N*N [U] + N [work]
1285* CWorkspace: prefer 2*N [tauq, taup] + N*N [U] + N*NB [work]
1286* RWorkspace: need N [e] + N*N [RU] + N*N [RVT]
1287*
1288 CALL zlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt )
1289 CALL zunmbr( 'P', 'R', 'C', n, n, n, a, lda,
1290 $ work( itaup ), vt, ldvt, work( nwork ),
1291 $ lwork-nwork+1, ierr )
1292*
1293 IF( lwork .GE. m*n + 3*n ) THEN
1294*
1295* Path 6o-fast
1296* Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
1297* Overwrite WORK(IU) by left singular vectors of A, copying
1298* to A
1299* CWorkspace: need 2*N [tauq, taup] + M*N [U] + N [work]
1300* CWorkspace: prefer 2*N [tauq, taup] + M*N [U] + N*NB [work]
1301* RWorkspace: need N [e] + N*N [RU]
1302*
1303 CALL zlaset( 'F', m, n, czero, czero, work( iu ),
1304 $ ldwrku )
1305 CALL zlacp2( 'F', n, n, rwork( iru ), n, work( iu ),
1306 $ ldwrku )
1307 CALL zunmbr( 'Q', 'L', 'N', m, n, n, a, lda,
1308 $ work( itauq ), work( iu ), ldwrku,
1309 $ work( nwork ), lwork-nwork+1, ierr )
1310 CALL zlacpy( 'F', m, n, work( iu ), ldwrku, a, lda )
1311 ELSE
1312*
1313* Path 6o-slow
1314* Generate Q in A
1315* CWorkspace: need 2*N [tauq, taup] + N*N [U] + N [work]
1316* CWorkspace: prefer 2*N [tauq, taup] + N*N [U] + N*NB [work]
1317* RWorkspace: need 0
1318*
1319 CALL zungbr( 'Q', m, n, n, a, lda, work( itauq ),
1320 $ work( nwork ), lwork-nwork+1, ierr )
1321*
1322* Multiply Q in A by real matrix RWORK(IRU), storing the
1323* result in WORK(IU), copying to A
1324* CWorkspace: need 2*N [tauq, taup] + N*N [U]
1325* CWorkspace: prefer 2*N [tauq, taup] + M*N [U]
1326* RWorkspace: need N [e] + N*N [RU] + 2*N*N [rwork]
1327* RWorkspace: prefer N [e] + N*N [RU] + 2*M*N [rwork] < N + 5*N*N since M < 2*N here
1328*
1329 nrwork = irvt
1330 DO 30 i = 1, m, ldwrku
1331 chunk = min( m-i+1, ldwrku )
1332 CALL zlacrm( chunk, n, a( i, 1 ), lda,
1333 $ rwork( iru ), n, work( iu ), ldwrku,
1334 $ rwork( nrwork ) )
1335 CALL zlacpy( 'F', chunk, n, work( iu ), ldwrku,
1336 $ a( i, 1 ), lda )
1337 30 CONTINUE
1338 END IF
1339*
1340 ELSE IF( wntqs ) THEN
1341*
1342* Path 6s (M >= N, JOBZ='S')
1343* Perform bidiagonal SVD, computing left singular vectors
1344* of bidiagonal matrix in RWORK(IRU) and computing right
1345* singular vectors of bidiagonal matrix in RWORK(IRVT)
1346* CWorkspace: need 0
1347* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC
1348*
1349 iru = nrwork
1350 irvt = iru + n*n
1351 nrwork = irvt + n*n
1352 CALL dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
1353 $ n, rwork( irvt ), n, dum, idum,
1354 $ rwork( nrwork ), iwork, info )
1355*
1356* Copy real matrix RWORK(IRU) to complex matrix U
1357* Overwrite U by left singular vectors of A
1358* CWorkspace: need 2*N [tauq, taup] + N [work]
1359* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work]
1360* RWorkspace: need N [e] + N*N [RU] + N*N [RVT]
1361*
1362 CALL zlaset( 'F', m, n, czero, czero, u, ldu )
1363 CALL zlacp2( 'F', n, n, rwork( iru ), n, u, ldu )
1364 CALL zunmbr( 'Q', 'L', 'N', m, n, n, a, lda,
1365 $ work( itauq ), u, ldu, work( nwork ),
1366 $ lwork-nwork+1, ierr )
1367*
1368* Copy real matrix RWORK(IRVT) to complex matrix VT
1369* Overwrite VT by right singular vectors of A
1370* CWorkspace: need 2*N [tauq, taup] + N [work]
1371* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work]
1372* RWorkspace: need N [e] + N*N [RU] + N*N [RVT]
1373*
1374 CALL zlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt )
1375 CALL zunmbr( 'P', 'R', 'C', n, n, n, a, lda,
1376 $ work( itaup ), vt, ldvt, work( nwork ),
1377 $ lwork-nwork+1, ierr )
1378 ELSE
1379*
1380* Path 6a (M >= N, JOBZ='A')
1381* Perform bidiagonal SVD, computing left singular vectors
1382* of bidiagonal matrix in RWORK(IRU) and computing right
1383* singular vectors of bidiagonal matrix in RWORK(IRVT)
1384* CWorkspace: need 0
1385* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC
1386*
1387 iru = nrwork
1388 irvt = iru + n*n
1389 nrwork = irvt + n*n
1390 CALL dbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
1391 $ n, rwork( irvt ), n, dum, idum,
1392 $ rwork( nrwork ), iwork, info )
1393*
1394* Set the right corner of U to identity matrix
1395*
1396 CALL zlaset( 'F', m, m, czero, czero, u, ldu )
1397 IF( m.GT.n ) THEN
1398 CALL zlaset( 'F', m-n, m-n, czero, cone,
1399 $ u( n+1, n+1 ), ldu )
1400 END IF
1401*
1402* Copy real matrix RWORK(IRU) to complex matrix U
1403* Overwrite U by left singular vectors of A
1404* CWorkspace: need 2*N [tauq, taup] + M [work]
1405* CWorkspace: prefer 2*N [tauq, taup] + M*NB [work]
1406* RWorkspace: need N [e] + N*N [RU] + N*N [RVT]
1407*
1408 CALL zlacp2( 'F', n, n, rwork( iru ), n, u, ldu )
1409 CALL zunmbr( 'Q', 'L', 'N', m, m, n, a, lda,
1410 $ work( itauq ), u, ldu, work( nwork ),
1411 $ lwork-nwork+1, ierr )
1412*
1413* Copy real matrix RWORK(IRVT) to complex matrix VT
1414* Overwrite VT by right singular vectors of A
1415* CWorkspace: need 2*N [tauq, taup] + N [work]
1416* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work]
1417* RWorkspace: need N [e] + N*N [RU] + N*N [RVT]
1418*
1419 CALL zlacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt )
1420 CALL zunmbr( 'P', 'R', 'C', n, n, n, a, lda,
1421 $ work( itaup ), vt, ldvt, work( nwork ),
1422 $ lwork-nwork+1, ierr )
1423 END IF
1424*
1425 END IF
1426*
1427 ELSE
1428*
1429* A has more columns than rows. If A has sufficiently more
1430* columns than rows, first reduce using the LQ decomposition (if
1431* sufficient workspace available)
1432*
1433 IF( n.GE.mnthr1 ) THEN
1434*
1435 IF( wntqn ) THEN
1436*
1437* Path 1t (N >> M, JOBZ='N')
1438* No singular vectors to be computed
1439*
1440 itau = 1
1441 nwork = itau + m
1442*
1443* Compute A=L*Q
1444* CWorkspace: need M [tau] + M [work]
1445* CWorkspace: prefer M [tau] + M*NB [work]
1446* RWorkspace: need 0
1447*
1448 CALL zgelqf( m, n, a, lda, work( itau ), work( nwork ),
1449 $ lwork-nwork+1, ierr )
1450*
1451* Zero out above L
1452*
1453 CALL zlaset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),
1454 $ lda )
1455 ie = 1
1456 itauq = 1
1457 itaup = itauq + m
1458 nwork = itaup + m
1459*
1460* Bidiagonalize L in A
1461* CWorkspace: need 2*M [tauq, taup] + M [work]
1462* CWorkspace: prefer 2*M [tauq, taup] + 2*M*NB [work]
1463* RWorkspace: need M [e]
1464*
1465 CALL zgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),
1466 $ work( itaup ), work( nwork ), lwork-nwork+1,
1467 $ ierr )
1468 nrwork = ie + m
1469*
1470* Perform bidiagonal SVD, compute singular values only
1471* CWorkspace: need 0
1472* RWorkspace: need M [e] + BDSPAC
1473*
1474 CALL dbdsdc( 'U', 'N', m, s, rwork( ie ), dum,1,dum,1,
1475 $ dum, idum, rwork( nrwork ), iwork, info )
1476*
1477 ELSE IF( wntqo ) THEN
1478*
1479* Path 2t (N >> M, JOBZ='O')
1480* M right singular vectors to be overwritten on A and
1481* M left singular vectors to be computed in U
1482*
1483 ivt = 1
1484 ldwkvt = m
1485*
1486* WORK(IVT) is M by M
1487*
1488 il = ivt + ldwkvt*m
1489 IF( lwork .GE. m*n + m*m + 3*m ) THEN
1490*
1491* WORK(IL) M by N
1492*
1493 ldwrkl = m
1494 chunk = n
1495 ELSE
1496*
1497* WORK(IL) is M by CHUNK
1498*
1499 ldwrkl = m
1500 chunk = ( lwork - m*m - 3*m ) / m
1501 END IF
1502 itau = il + ldwrkl*chunk
1503 nwork = itau + m
1504*
1505* Compute A=L*Q
1506* CWorkspace: need M*M [VT] + M*M [L] + M [tau] + M [work]
1507* CWorkspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
1508* RWorkspace: need 0
1509*
1510 CALL zgelqf( m, n, a, lda, work( itau ), work( nwork ),
1511 $ lwork-nwork+1, ierr )
1512*
1513* Copy L to WORK(IL), zeroing about above it
1514*
1515 CALL zlacpy( 'L', m, m, a, lda, work( il ), ldwrkl )
1516 CALL zlaset( 'U', m-1, m-1, czero, czero,
1517 $ work( il+ldwrkl ), ldwrkl )
1518*
1519* Generate Q in A
1520* CWorkspace: need M*M [VT] + M*M [L] + M [tau] + M [work]
1521* CWorkspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
1522* RWorkspace: need 0
1523*
1524 CALL zunglq( m, n, m, a, lda, work( itau ),
1525 $ work( nwork ), lwork-nwork+1, ierr )
1526 ie = 1
1527 itauq = itau
1528 itaup = itauq + m
1529 nwork = itaup + m
1530*
1531* Bidiagonalize L in WORK(IL)
1532* CWorkspace: need M*M [VT] + M*M [L] + 2*M [tauq, taup] + M [work]
1533* CWorkspace: prefer M*M [VT] + M*M [L] + 2*M [tauq, taup] + 2*M*NB [work]
1534* RWorkspace: need M [e]
1535*
1536 CALL zgebrd( m, m, work( il ), ldwrkl, s, rwork( ie ),
1537 $ work( itauq ), work( itaup ), work( nwork ),
1538 $ lwork-nwork+1, ierr )
1539*
1540* Perform bidiagonal SVD, computing left singular vectors
1541* of bidiagonal matrix in RWORK(IRU) and computing right
1542* singular vectors of bidiagonal matrix in RWORK(IRVT)
1543* CWorkspace: need 0
1544* RWorkspace: need M [e] + M*M [RU] + M*M [RVT] + BDSPAC
1545*
1546 iru = ie + m
1547 irvt = iru + m*m
1548 nrwork = irvt + m*m
1549 CALL dbdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),
1550 $ m, rwork( irvt ), m, dum, idum,
1551 $ rwork( nrwork ), iwork, info )
1552*
1553* Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
1554* Overwrite WORK(IU) by the left singular vectors of L
1555* CWorkspace: need M*M [VT] + M*M [L] + 2*M [tauq, taup] + M [work]
1556* CWorkspace: prefer M*M [VT] + M*M [L] + 2*M [tauq, taup] + M*NB [work]
1557* RWorkspace: need 0
1558*
1559 CALL zlacp2( 'F', m, m, rwork( iru ), m, u, ldu )
1560 CALL zunmbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,
1561 $ work( itauq ), u, ldu, work( nwork ),
1562 $ lwork-nwork+1, ierr )
1563*
1564* Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
1565* Overwrite WORK(IVT) by the right singular vectors of L
1566* CWorkspace: need M*M [VT] + M*M [L] + 2*M [tauq, taup] + M [work]
1567* CWorkspace: prefer M*M [VT] + M*M [L] + 2*M [tauq, taup] + M*NB [work]
1568* RWorkspace: need 0
1569*
1570 CALL zlacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),
1571 $ ldwkvt )
1572 CALL zunmbr( 'P', 'R', 'C', m, m, m, work( il ), ldwrkl,
1573 $ work( itaup ), work( ivt ), ldwkvt,
1574 $ work( nwork ), lwork-nwork+1, ierr )
1575*
1576* Multiply right singular vectors of L in WORK(IL) by Q
1577* in A, storing result in WORK(IL) and copying to A
1578* CWorkspace: need M*M [VT] + M*M [L]
1579* CWorkspace: prefer M*M [VT] + M*N [L]
1580* RWorkspace: need 0
1581*
1582 DO 40 i = 1, n, chunk
1583 blk = min( n-i+1, chunk )
1584 CALL zgemm( 'N', 'N', m, blk, m, cone, work( ivt ), m,
1585 $ a( 1, i ), lda, czero, work( il ),
1586 $ ldwrkl )
1587 CALL zlacpy( 'F', m, blk, work( il ), ldwrkl,
1588 $ a( 1, i ), lda )
1589 40 CONTINUE
1590*
1591 ELSE IF( wntqs ) THEN
1592*
1593* Path 3t (N >> M, JOBZ='S')
1594* M right singular vectors to be computed in VT and
1595* M left singular vectors to be computed in U
1596*
1597 il = 1
1598*
1599* WORK(IL) is M by M
1600*
1601 ldwrkl = m
1602 itau = il + ldwrkl*m
1603 nwork = itau + m
1604*
1605* Compute A=L*Q
1606* CWorkspace: need M*M [L] + M [tau] + M [work]
1607* CWorkspace: prefer M*M [L] + M [tau] + M*NB [work]
1608* RWorkspace: need 0
1609*
1610 CALL zgelqf( m, n, a, lda, work( itau ), work( nwork ),
1611 $ lwork-nwork+1, ierr )
1612*
1613* Copy L to WORK(IL), zeroing out above it
1614*
1615 CALL zlacpy( 'L', m, m, a, lda, work( il ), ldwrkl )
1616 CALL zlaset( 'U', m-1, m-1, czero, czero,
1617 $ work( il+ldwrkl ), ldwrkl )
1618*
1619* Generate Q in A
1620* CWorkspace: need M*M [L] + M [tau] + M [work]
1621* CWorkspace: prefer M*M [L] + M [tau] + M*NB [work]
1622* RWorkspace: need 0
1623*
1624 CALL zunglq( m, n, m, a, lda, work( itau ),
1625 $ work( nwork ), lwork-nwork+1, ierr )
1626 ie = 1
1627 itauq = itau
1628 itaup = itauq + m
1629 nwork = itaup + m
1630*
1631* Bidiagonalize L in WORK(IL)
1632* CWorkspace: need M*M [L] + 2*M [tauq, taup] + M [work]
1633* CWorkspace: prefer M*M [L] + 2*M [tauq, taup] + 2*M*NB [work]
1634* RWorkspace: need M [e]
1635*
1636 CALL zgebrd( m, m, work( il ), ldwrkl, s, rwork( ie ),
1637 $ work( itauq ), work( itaup ), work( nwork ),
1638 $ lwork-nwork+1, ierr )
1639*
1640* Perform bidiagonal SVD, computing left singular vectors
1641* of bidiagonal matrix in RWORK(IRU) and computing right
1642* singular vectors of bidiagonal matrix in RWORK(IRVT)
1643* CWorkspace: need 0
1644* RWorkspace: need M [e] + M*M [RU] + M*M [RVT] + BDSPAC
1645*
1646 iru = ie + m
1647 irvt = iru + m*m
1648 nrwork = irvt + m*m
1649 CALL dbdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),
1650 $ m, rwork( irvt ), m, dum, idum,
1651 $ rwork( nrwork ), iwork, info )
1652*
1653* Copy real matrix RWORK(IRU) to complex matrix U
1654* Overwrite U by left singular vectors of L
1655* CWorkspace: need M*M [L] + 2*M [tauq, taup] + M [work]
1656* CWorkspace: prefer M*M [L] + 2*M [tauq, taup] + M*NB [work]
1657* RWorkspace: need 0
1658*
1659 CALL zlacp2( 'F', m, m, rwork( iru ), m, u, ldu )
1660 CALL zunmbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,
1661 $ work( itauq ), u, ldu, work( nwork ),
1662 $ lwork-nwork+1, ierr )
1663*
1664* Copy real matrix RWORK(IRVT) to complex matrix VT
1665* Overwrite VT by left singular vectors of L
1666* CWorkspace: need M*M [L] + 2*M [tauq, taup] + M [work]
1667* CWorkspace: prefer M*M [L] + 2*M [tauq, taup] + M*NB [work]
1668* RWorkspace: need 0
1669*
1670 CALL zlacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt )
1671 CALL zunmbr( 'P', 'R', 'C', m, m, m, work( il ), ldwrkl,
1672 $ work( itaup ), vt, ldvt, work( nwork ),
1673 $ lwork-nwork+1, ierr )
1674*
1675* Copy VT to WORK(IL), multiply right singular vectors of L
1676* in WORK(IL) by Q in A, storing result in VT
1677* CWorkspace: need M*M [L]
1678* RWorkspace: need 0
1679*
1680 CALL zlacpy( 'F', m, m, vt, ldvt, work( il ), ldwrkl )
1681 CALL zgemm( 'N', 'N', m, n, m, cone, work( il ), ldwrkl,
1682 $ a, lda, czero, vt, ldvt )
1683*
1684 ELSE IF( wntqa ) THEN
1685*
1686* Path 4t (N >> M, JOBZ='A')
1687* N right singular vectors to be computed in VT and
1688* M left singular vectors to be computed in U
1689*
1690 ivt = 1
1691*
1692* WORK(IVT) is M by M
1693*
1694 ldwkvt = m
1695 itau = ivt + ldwkvt*m
1696 nwork = itau + m
1697*
1698* Compute A=L*Q, copying result to VT
1699* CWorkspace: need M*M [VT] + M [tau] + M [work]
1700* CWorkspace: prefer M*M [VT] + M [tau] + M*NB [work]
1701* RWorkspace: need 0
1702*
1703 CALL zgelqf( m, n, a, lda, work( itau ), work( nwork ),
1704 $ lwork-nwork+1, ierr )
1705 CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
1706*
1707* Generate Q in VT
1708* CWorkspace: need M*M [VT] + M [tau] + N [work]
1709* CWorkspace: prefer M*M [VT] + M [tau] + N*NB [work]
1710* RWorkspace: need 0
1711*
1712 CALL zunglq( n, n, m, vt, ldvt, work( itau ),
1713 $ work( nwork ), lwork-nwork+1, ierr )
1714*
1715* Produce L in A, zeroing out above it
1716*
1717 CALL zlaset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),
1718 $ lda )
1719 ie = 1
1720 itauq = itau
1721 itaup = itauq + m
1722 nwork = itaup + m
1723*
1724* Bidiagonalize L in A
1725* CWorkspace: need M*M [VT] + 2*M [tauq, taup] + M [work]
1726* CWorkspace: prefer M*M [VT] + 2*M [tauq, taup] + 2*M*NB [work]
1727* RWorkspace: need M [e]
1728*
1729 CALL zgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),
1730 $ work( itaup ), work( nwork ), lwork-nwork+1,
1731 $ ierr )
1732*
1733* Perform bidiagonal SVD, computing left singular vectors
1734* of bidiagonal matrix in RWORK(IRU) and computing right
1735* singular vectors of bidiagonal matrix in RWORK(IRVT)
1736* CWorkspace: need 0
1737* RWorkspace: need M [e] + M*M [RU] + M*M [RVT] + BDSPAC
1738*
1739 iru = ie + m
1740 irvt = iru + m*m
1741 nrwork = irvt + m*m
1742 CALL dbdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),
1743 $ m, rwork( irvt ), m, dum, idum,
1744 $ rwork( nrwork ), iwork, info )
1745*
1746* Copy real matrix RWORK(IRU) to complex matrix U
1747* Overwrite U by left singular vectors of L
1748* CWorkspace: need M*M [VT] + 2*M [tauq, taup] + M [work]
1749* CWorkspace: prefer M*M [VT] + 2*M [tauq, taup] + M*NB [work]
1750* RWorkspace: need 0
1751*
1752 CALL zlacp2( 'F', m, m, rwork( iru ), m, u, ldu )
1753 CALL zunmbr( 'Q', 'L', 'N', m, m, m, a, lda,
1754 $ work( itauq ), u, ldu, work( nwork ),
1755 $ lwork-nwork+1, ierr )
1756*
1757* Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
1758* Overwrite WORK(IVT) by right singular vectors of L
1759* CWorkspace: need M*M [VT] + 2*M [tauq, taup] + M [work]
1760* CWorkspace: prefer M*M [VT] + 2*M [tauq, taup] + M*NB [work]
1761* RWorkspace: need 0
1762*
1763 CALL zlacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),
1764 $ ldwkvt )
1765 CALL zunmbr( 'P', 'R', 'C', m, m, m, a, lda,
1766 $ work( itaup ), work( ivt ), ldwkvt,
1767 $ work( nwork ), lwork-nwork+1, ierr )
1768*
1769* Multiply right singular vectors of L in WORK(IVT) by
1770* Q in VT, storing result in A
1771* CWorkspace: need M*M [VT]
1772* RWorkspace: need 0
1773*
1774 CALL zgemm( 'N', 'N', m, n, m, cone, work( ivt ), ldwkvt,
1775 $ vt, ldvt, czero, a, lda )
1776*
1777* Copy right singular vectors of A from A to VT
1778*
1779 CALL zlacpy( 'F', m, n, a, lda, vt, ldvt )
1780*
1781 END IF
1782*
1783 ELSE IF( n.GE.mnthr2 ) THEN
1784*
1785* MNTHR2 <= N < MNTHR1
1786*
1787* Path 5t (N >> M, but not as much as MNTHR1)
1788* Reduce to bidiagonal form without QR decomposition, use
1789* ZUNGBR and matrix multiplication to compute singular vectors
1790*
1791 ie = 1
1792 nrwork = ie + m
1793 itauq = 1
1794 itaup = itauq + m
1795 nwork = itaup + m
1796*
1797* Bidiagonalize A
1798* CWorkspace: need 2*M [tauq, taup] + N [work]
1799* CWorkspace: prefer 2*M [tauq, taup] + (M+N)*NB [work]
1800* RWorkspace: need M [e]
1801*
1802 CALL zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
1803 $ work( itaup ), work( nwork ), lwork-nwork+1,
1804 $ ierr )
1805*
1806 IF( wntqn ) THEN
1807*
1808* Path 5tn (N >> M, JOBZ='N')
1809* Compute singular values only
1810* CWorkspace: need 0
1811* RWorkspace: need M [e] + BDSPAC
1812*
1813 CALL dbdsdc( 'L', 'N', m, s, rwork( ie ), dum,1,dum,1,
1814 $ dum, idum, rwork( nrwork ), iwork, info )
1815 ELSE IF( wntqo ) THEN
1816 irvt = nrwork
1817 iru = irvt + m*m
1818 nrwork = iru + m*m
1819 ivt = nwork
1820*
1821* Path 5to (N >> M, JOBZ='O')
1822* Copy A to U, generate Q
1823* CWorkspace: need 2*M [tauq, taup] + M [work]
1824* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work]
1825* RWorkspace: need 0
1826*
1827 CALL zlacpy( 'L', m, m, a, lda, u, ldu )
1828 CALL zungbr( 'Q', m, m, n, u, ldu, work( itauq ),
1829 $ work( nwork ), lwork-nwork+1, ierr )
1830*
1831* Generate P**H in A
1832* CWorkspace: need 2*M [tauq, taup] + M [work]
1833* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work]
1834* RWorkspace: need 0
1835*
1836 CALL zungbr( 'P', m, n, m, a, lda, work( itaup ),
1837 $ work( nwork ), lwork-nwork+1, ierr )
1838*
1839 ldwkvt = m
1840 IF( lwork .GE. m*n + 3*m ) THEN
1841*
1842* WORK( IVT ) is M by N
1843*
1844 nwork = ivt + ldwkvt*n
1845 chunk = n
1846 ELSE
1847*
1848* WORK( IVT ) is M by CHUNK
1849*
1850 chunk = ( lwork - 3*m ) / m
1851 nwork = ivt + ldwkvt*chunk
1852 END IF
1853*
1854* Perform bidiagonal SVD, computing left singular vectors
1855* of bidiagonal matrix in RWORK(IRU) and computing right
1856* singular vectors of bidiagonal matrix in RWORK(IRVT)
1857* CWorkspace: need 0
1858* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC
1859*
1860 CALL dbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),
1861 $ m, rwork( irvt ), m, dum, idum,
1862 $ rwork( nrwork ), iwork, info )
1863*
1864* Multiply Q in U by real matrix RWORK(IRVT)
1865* storing the result in WORK(IVT), copying to U
1866* CWorkspace: need 2*M [tauq, taup] + M*M [VT]
1867* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + 2*M*M [rwork]
1868*
1869 CALL zlacrm( m, m, u, ldu, rwork( iru ), m, work( ivt ),
1870 $ ldwkvt, rwork( nrwork ) )
1871 CALL zlacpy( 'F', m, m, work( ivt ), ldwkvt, u, ldu )
1872*
1873* Multiply RWORK(IRVT) by P**H in A, storing the
1874* result in WORK(IVT), copying to A
1875* CWorkspace: need 2*M [tauq, taup] + M*M [VT]
1876* CWorkspace: prefer 2*M [tauq, taup] + M*N [VT]
1877* RWorkspace: need M [e] + M*M [RVT] + 2*M*M [rwork]
1878* RWorkspace: prefer M [e] + M*M [RVT] + 2*M*N [rwork] < M + 5*M*M since N < 2*M here
1879*
1880 nrwork = iru
1881 DO 50 i = 1, n, chunk
1882 blk = min( n-i+1, chunk )
1883 CALL zlarcm( m, blk, rwork( irvt ), m, a( 1, i ), lda,
1884 $ work( ivt ), ldwkvt, rwork( nrwork ) )
1885 CALL zlacpy( 'F', m, blk, work( ivt ), ldwkvt,
1886 $ a( 1, i ), lda )
1887 50 CONTINUE
1888 ELSE IF( wntqs ) THEN
1889*
1890* Path 5ts (N >> M, JOBZ='S')
1891* Copy A to U, generate Q
1892* CWorkspace: need 2*M [tauq, taup] + M [work]
1893* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work]
1894* RWorkspace: need 0
1895*
1896 CALL zlacpy( 'L', m, m, a, lda, u, ldu )
1897 CALL zungbr( 'Q', m, m, n, u, ldu, work( itauq ),
1898 $ work( nwork ), lwork-nwork+1, ierr )
1899*
1900* Copy A to VT, generate P**H
1901* CWorkspace: need 2*M [tauq, taup] + M [work]
1902* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work]
1903* RWorkspace: need 0
1904*
1905 CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
1906 CALL zungbr( 'P', m, n, m, vt, ldvt, work( itaup ),
1907 $ work( nwork ), lwork-nwork+1, ierr )
1908*
1909* Perform bidiagonal SVD, computing left singular vectors
1910* of bidiagonal matrix in RWORK(IRU) and computing right
1911* singular vectors of bidiagonal matrix in RWORK(IRVT)
1912* CWorkspace: need 0
1913* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC
1914*
1915 irvt = nrwork
1916 iru = irvt + m*m
1917 nrwork = iru + m*m
1918 CALL dbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),
1919 $ m, rwork( irvt ), m, dum, idum,
1920 $ rwork( nrwork ), iwork, info )
1921*
1922* Multiply Q in U by real matrix RWORK(IRU), storing the
1923* result in A, copying to U
1924* CWorkspace: need 0
1925* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + 2*M*M [rwork]
1926*
1927 CALL zlacrm( m, m, u, ldu, rwork( iru ), m, a, lda,
1928 $ rwork( nrwork ) )
1929 CALL zlacpy( 'F', m, m, a, lda, u, ldu )
1930*
1931* Multiply real matrix RWORK(IRVT) by P**H in VT,
1932* storing the result in A, copying to VT
1933* CWorkspace: need 0
1934* RWorkspace: need M [e] + M*M [RVT] + 2*M*N [rwork] < M + 5*M*M since N < 2*M here
1935*
1936 nrwork = iru
1937 CALL zlarcm( m, n, rwork( irvt ), m, vt, ldvt, a, lda,
1938 $ rwork( nrwork ) )
1939 CALL zlacpy( 'F', m, n, a, lda, vt, ldvt )
1940 ELSE
1941*
1942* Path 5ta (N >> M, JOBZ='A')
1943* Copy A to U, generate Q
1944* CWorkspace: need 2*M [tauq, taup] + M [work]
1945* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work]
1946* RWorkspace: need 0
1947*
1948 CALL zlacpy( 'L', m, m, a, lda, u, ldu )
1949 CALL zungbr( 'Q', m, m, n, u, ldu, work( itauq ),
1950 $ work( nwork ), lwork-nwork+1, ierr )
1951*
1952* Copy A to VT, generate P**H
1953* CWorkspace: need 2*M [tauq, taup] + N [work]
1954* CWorkspace: prefer 2*M [tauq, taup] + N*NB [work]
1955* RWorkspace: need 0
1956*
1957 CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
1958 CALL zungbr( 'P', n, n, m, vt, ldvt, work( itaup ),
1959 $ work( nwork ), lwork-nwork+1, ierr )
1960*
1961* Perform bidiagonal SVD, computing left singular vectors
1962* of bidiagonal matrix in RWORK(IRU) and computing right
1963* singular vectors of bidiagonal matrix in RWORK(IRVT)
1964* CWorkspace: need 0
1965* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC
1966*
1967 irvt = nrwork
1968 iru = irvt + m*m
1969 nrwork = iru + m*m
1970 CALL dbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),
1971 $ m, rwork( irvt ), m, dum, idum,
1972 $ rwork( nrwork ), iwork, info )
1973*
1974* Multiply Q in U by real matrix RWORK(IRU), storing the
1975* result in A, copying to U
1976* CWorkspace: need 0
1977* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + 2*M*M [rwork]
1978*
1979 CALL zlacrm( m, m, u, ldu, rwork( iru ), m, a, lda,
1980 $ rwork( nrwork ) )
1981 CALL zlacpy( 'F', m, m, a, lda, u, ldu )
1982*
1983* Multiply real matrix RWORK(IRVT) by P**H in VT,
1984* storing the result in A, copying to VT
1985* CWorkspace: need 0
1986* RWorkspace: need M [e] + M*M [RVT] + 2*M*N [rwork] < M + 5*M*M since N < 2*M here
1987*
1988 nrwork = iru
1989 CALL zlarcm( m, n, rwork( irvt ), m, vt, ldvt, a, lda,
1990 $ rwork( nrwork ) )
1991 CALL zlacpy( 'F', m, n, a, lda, vt, ldvt )
1992 END IF
1993*
1994 ELSE
1995*
1996* N .LT. MNTHR2
1997*
1998* Path 6t (N > M, but not much larger)
1999* Reduce to bidiagonal form without LQ decomposition
2000* Use ZUNMBR to compute singular vectors
2001*
2002 ie = 1
2003 nrwork = ie + m
2004 itauq = 1
2005 itaup = itauq + m
2006 nwork = itaup + m
2007*
2008* Bidiagonalize A
2009* CWorkspace: need 2*M [tauq, taup] + N [work]
2010* CWorkspace: prefer 2*M [tauq, taup] + (M+N)*NB [work]
2011* RWorkspace: need M [e]
2012*
2013 CALL zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
2014 $ work( itaup ), work( nwork ), lwork-nwork+1,
2015 $ ierr )
2016 IF( wntqn ) THEN
2017*
2018* Path 6tn (N > M, JOBZ='N')
2019* Compute singular values only
2020* CWorkspace: need 0
2021* RWorkspace: need M [e] + BDSPAC
2022*
2023 CALL dbdsdc( 'L', 'N', m, s, rwork( ie ), dum,1,dum,1,
2024 $ dum, idum, rwork( nrwork ), iwork, info )
2025 ELSE IF( wntqo ) THEN
2026* Path 6to (N > M, JOBZ='O')
2027 ldwkvt = m
2028 ivt = nwork
2029 IF( lwork .GE. m*n + 3*m ) THEN
2030*
2031* WORK( IVT ) is M by N
2032*
2033 CALL zlaset( 'F', m, n, czero, czero, work( ivt ),
2034 $ ldwkvt )
2035 nwork = ivt + ldwkvt*n
2036 ELSE
2037*
2038* WORK( IVT ) is M by CHUNK
2039*
2040 chunk = ( lwork - 3*m ) / m
2041 nwork = ivt + ldwkvt*chunk
2042 END IF
2043*
2044* Perform bidiagonal SVD, computing left singular vectors
2045* of bidiagonal matrix in RWORK(IRU) and computing right
2046* singular vectors of bidiagonal matrix in RWORK(IRVT)
2047* CWorkspace: need 0
2048* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC
2049*
2050 irvt = nrwork
2051 iru = irvt + m*m
2052 nrwork = iru + m*m
2053 CALL dbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),
2054 $ m, rwork( irvt ), m, dum, idum,
2055 $ rwork( nrwork ), iwork, info )
2056*
2057* Copy real matrix RWORK(IRU) to complex matrix U
2058* Overwrite U by left singular vectors of A
2059* CWorkspace: need 2*M [tauq, taup] + M*M [VT] + M [work]
2060* CWorkspace: prefer 2*M [tauq, taup] + M*M [VT] + M*NB [work]
2061* RWorkspace: need M [e] + M*M [RVT] + M*M [RU]
2062*
2063 CALL zlacp2( 'F', m, m, rwork( iru ), m, u, ldu )
2064 CALL zunmbr( 'Q', 'L', 'N', m, m, n, a, lda,
2065 $ work( itauq ), u, ldu, work( nwork ),
2066 $ lwork-nwork+1, ierr )
2067*
2068 IF( lwork .GE. m*n + 3*m ) THEN
2069*
2070* Path 6to-fast
2071* Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
2072* Overwrite WORK(IVT) by right singular vectors of A,
2073* copying to A
2074* CWorkspace: need 2*M [tauq, taup] + M*N [VT] + M [work]
2075* CWorkspace: prefer 2*M [tauq, taup] + M*N [VT] + M*NB [work]
2076* RWorkspace: need M [e] + M*M [RVT]
2077*
2078 CALL zlacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),
2079 $ ldwkvt )
2080 CALL zunmbr( 'P', 'R', 'C', m, n, m, a, lda,
2081 $ work( itaup ), work( ivt ), ldwkvt,
2082 $ work( nwork ), lwork-nwork+1, ierr )
2083 CALL zlacpy( 'F', m, n, work( ivt ), ldwkvt, a, lda )
2084 ELSE
2085*
2086* Path 6to-slow
2087* Generate P**H in A
2088* CWorkspace: need 2*M [tauq, taup] + M*M [VT] + M [work]
2089* CWorkspace: prefer 2*M [tauq, taup] + M*M [VT] + M*NB [work]
2090* RWorkspace: need 0
2091*
2092 CALL zungbr( 'P', m, n, m, a, lda, work( itaup ),
2093 $ work( nwork ), lwork-nwork+1, ierr )
2094*
2095* Multiply Q in A by real matrix RWORK(IRU), storing the
2096* result in WORK(IU), copying to A
2097* CWorkspace: need 2*M [tauq, taup] + M*M [VT]
2098* CWorkspace: prefer 2*M [tauq, taup] + M*N [VT]
2099* RWorkspace: need M [e] + M*M [RVT] + 2*M*M [rwork]
2100* RWorkspace: prefer M [e] + M*M [RVT] + 2*M*N [rwork] < M + 5*M*M since N < 2*M here
2101*
2102 nrwork = iru
2103 DO 60 i = 1, n, chunk
2104 blk = min( n-i+1, chunk )
2105 CALL zlarcm( m, blk, rwork( irvt ), m, a( 1, i ),
2106 $ lda, work( ivt ), ldwkvt,
2107 $ rwork( nrwork ) )
2108 CALL zlacpy( 'F', m, blk, work( ivt ), ldwkvt,
2109 $ a( 1, i ), lda )
2110 60 CONTINUE
2111 END IF
2112 ELSE IF( wntqs ) THEN
2113*
2114* Path 6ts (N > M, JOBZ='S')
2115* Perform bidiagonal SVD, computing left singular vectors
2116* of bidiagonal matrix in RWORK(IRU) and computing right
2117* singular vectors of bidiagonal matrix in RWORK(IRVT)
2118* CWorkspace: need 0
2119* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC
2120*
2121 irvt = nrwork
2122 iru = irvt + m*m
2123 nrwork = iru + m*m
2124 CALL dbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),
2125 $ m, rwork( irvt ), m, dum, idum,
2126 $ rwork( nrwork ), iwork, info )
2127*
2128* Copy real matrix RWORK(IRU) to complex matrix U
2129* Overwrite U by left singular vectors of A
2130* CWorkspace: need 2*M [tauq, taup] + M [work]
2131* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work]
2132* RWorkspace: need M [e] + M*M [RVT] + M*M [RU]
2133*
2134 CALL zlacp2( 'F', m, m, rwork( iru ), m, u, ldu )
2135 CALL zunmbr( 'Q', 'L', 'N', m, m, n, a, lda,
2136 $ work( itauq ), u, ldu, work( nwork ),
2137 $ lwork-nwork+1, ierr )
2138*
2139* Copy real matrix RWORK(IRVT) to complex matrix VT
2140* Overwrite VT by right singular vectors of A
2141* CWorkspace: need 2*M [tauq, taup] + M [work]
2142* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work]
2143* RWorkspace: need M [e] + M*M [RVT]
2144*
2145 CALL zlaset( 'F', m, n, czero, czero, vt, ldvt )
2146 CALL zlacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt )
2147 CALL zunmbr( 'P', 'R', 'C', m, n, m, a, lda,
2148 $ work( itaup ), vt, ldvt, work( nwork ),
2149 $ lwork-nwork+1, ierr )
2150 ELSE
2151*
2152* Path 6ta (N > M, JOBZ='A')
2153* Perform bidiagonal SVD, computing left singular vectors
2154* of bidiagonal matrix in RWORK(IRU) and computing right
2155* singular vectors of bidiagonal matrix in RWORK(IRVT)
2156* CWorkspace: need 0
2157* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC
2158*
2159 irvt = nrwork
2160 iru = irvt + m*m
2161 nrwork = iru + m*m
2162*
2163 CALL dbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),
2164 $ m, rwork( irvt ), m, dum, idum,
2165 $ rwork( nrwork ), iwork, info )
2166*
2167* Copy real matrix RWORK(IRU) to complex matrix U
2168* Overwrite U by left singular vectors of A
2169* CWorkspace: need 2*M [tauq, taup] + M [work]
2170* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work]
2171* RWorkspace: need M [e] + M*M [RVT] + M*M [RU]
2172*
2173 CALL zlacp2( 'F', m, m, rwork( iru ), m, u, ldu )
2174 CALL zunmbr( 'Q', 'L', 'N', m, m, n, a, lda,
2175 $ work( itauq ), u, ldu, work( nwork ),
2176 $ lwork-nwork+1, ierr )
2177*
2178* Set all of VT to identity matrix
2179*
2180 CALL zlaset( 'F', n, n, czero, cone, vt, ldvt )
2181*
2182* Copy real matrix RWORK(IRVT) to complex matrix VT
2183* Overwrite VT by right singular vectors of A
2184* CWorkspace: need 2*M [tauq, taup] + N [work]
2185* CWorkspace: prefer 2*M [tauq, taup] + N*NB [work]
2186* RWorkspace: need M [e] + M*M [RVT]
2187*
2188 CALL zlacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt )
2189 CALL zunmbr( 'P', 'R', 'C', n, n, m, a, lda,
2190 $ work( itaup ), vt, ldvt, work( nwork ),
2191 $ lwork-nwork+1, ierr )
2192 END IF
2193*
2194 END IF
2195*
2196 END IF
2197*
2198* Undo scaling if necessary
2199*
2200 IF( iscl.EQ.1 ) THEN
2201 IF( anrm.GT.bignum )
2202 $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
2203 $ ierr )
2204 IF( info.NE.0 .AND. anrm.GT.bignum )
2205 $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn-1, 1,
2206 $ rwork( ie ), minmn, ierr )
2207 IF( anrm.LT.smlnum )
2208 $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
2209 $ ierr )
2210 IF( info.NE.0 .AND. anrm.LT.smlnum )
2211 $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1,
2212 $ rwork( ie ), minmn, ierr )
2213 END IF
2214*
2215* Return optimal workspace in WORK(1)
2216*
2217 work( 1 ) = droundup_lwork( maxwrk )
2218*
2219 RETURN
2220*
2221* End of ZGESDD
2222*
double precision function droundup_lwork(lwork)
DROUNDUP_LWORK
logical function disnan(din)
DISNAN tests input for NaN.
Definition disnan.f:59
subroutine dbdsdc(uplo, compq, n, d, e, u, ldu, vt, ldvt, q, iq, work, iwork, info)
DBDSDC
Definition dbdsdc.f:205
subroutine zungbr(vect, m, n, k, a, lda, tau, work, lwork, info)
ZUNGBR
Definition zungbr.f:157
double precision function zlange(norm, m, n, a, lda, work)
ZLANGE returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value ...
Definition zlange.f:115
subroutine zgebrd(m, n, a, lda, d, e, tauq, taup, work, lwork, info)
ZGEBRD
Definition zgebrd.f:205
subroutine zlacp2(uplo, m, n, a, lda, b, ldb)
ZLACP2 copies all or part of a real two-dimensional array to a complex array.
Definition zlacp2.f:104
subroutine zlacrm(m, n, a, lda, b, ldb, c, ldc, rwork)
ZLACRM multiplies a complex matrix by a square real matrix.
Definition zlacrm.f:114
subroutine zlarcm(m, n, a, lda, b, ldb, c, ldc, rwork)
ZLARCM copies all or part of a real two-dimensional array to a complex array.
Definition zlarcm.f:114
subroutine zunmbr(vect, side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
ZUNMBR
Definition zunmbr.f:196
subroutine zunglq(m, n, k, a, lda, tau, work, lwork, info)
ZUNGLQ
Definition zunglq.f:127
subroutine zgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
ZGEMM
Definition zgemm.f:187

◆ zgesvd()

subroutine zgesvd ( character jobu,
character jobvt,
integer m,
integer n,
complex*16, dimension( lda, * ) a,
integer lda,
double precision, dimension( * ) s,
complex*16, dimension( ldu, * ) u,
integer ldu,
complex*16, dimension( ldvt, * ) vt,
integer ldvt,
complex*16, dimension( * ) work,
integer lwork,
double precision, dimension( * ) rwork,
integer info )

ZGESVD computes the singular value decomposition (SVD) for GE matrices

Download ZGESVD + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> ZGESVD computes the singular value decomposition (SVD) of a complex
!> M-by-N matrix A, optionally computing the left and/or right singular
!> vectors. The SVD is written
!>
!>      A = U * SIGMA * conjugate-transpose(V)
!>
!> where SIGMA is an M-by-N matrix which is zero except for its
!> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
!> V is an N-by-N unitary matrix.  The diagonal elements of SIGMA
!> are the singular values of A; they are real and non-negative, and
!> are returned in descending order.  The first min(m,n) columns of
!> U and V are the left and right singular vectors of A.
!>
!> Note that the routine returns V**H, not V.
!> 
Parameters
[in]JOBU
!>          JOBU is CHARACTER*1
!>          Specifies options for computing all or part of the matrix U:
!>          = 'A':  all M columns of U are returned in array U:
!>          = 'S':  the first min(m,n) columns of U (the left singular
!>                  vectors) are returned in the array U;
!>          = 'O':  the first min(m,n) columns of U (the left singular
!>                  vectors) are overwritten on the array A;
!>          = 'N':  no columns of U (no left singular vectors) are
!>                  computed.
!> 
[in]JOBVT
!>          JOBVT is CHARACTER*1
!>          Specifies options for computing all or part of the matrix
!>          V**H:
!>          = 'A':  all N rows of V**H are returned in the array VT;
!>          = 'S':  the first min(m,n) rows of V**H (the right singular
!>                  vectors) are returned in the array VT;
!>          = 'O':  the first min(m,n) rows of V**H (the right singular
!>                  vectors) are overwritten on the array A;
!>          = 'N':  no rows of V**H (no right singular vectors) are
!>                  computed.
!>
!>          JOBVT and JOBU cannot both be 'O'.
!> 
[in]M
!>          M is INTEGER
!>          The number of rows of the input matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>          The number of columns of the input matrix A.  N >= 0.
!> 
[in,out]A
!>          A is COMPLEX*16 array, dimension (LDA,N)
!>          On entry, the M-by-N matrix A.
!>          On exit,
!>          if JOBU = 'O',  A is overwritten with the first min(m,n)
!>                          columns of U (the left singular vectors,
!>                          stored columnwise);
!>          if JOBVT = 'O', A is overwritten with the first min(m,n)
!>                          rows of V**H (the right singular vectors,
!>                          stored rowwise);
!>          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
!>                          are destroyed.
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A.  LDA >= max(1,M).
!> 
[out]S
!>          S is DOUBLE PRECISION array, dimension (min(M,N))
!>          The singular values of A, sorted so that S(i) >= S(i+1).
!> 
[out]U
!>          U is COMPLEX*16 array, dimension (LDU,UCOL)
!>          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
!>          If JOBU = 'A', U contains the M-by-M unitary matrix U;
!>          if JOBU = 'S', U contains the first min(m,n) columns of U
!>          (the left singular vectors, stored columnwise);
!>          if JOBU = 'N' or 'O', U is not referenced.
!> 
[in]LDU
!>          LDU is INTEGER
!>          The leading dimension of the array U.  LDU >= 1; if
!>          JOBU = 'S' or 'A', LDU >= M.
!> 
[out]VT
!>          VT is COMPLEX*16 array, dimension (LDVT,N)
!>          If JOBVT = 'A', VT contains the N-by-N unitary matrix
!>          V**H;
!>          if JOBVT = 'S', VT contains the first min(m,n) rows of
!>          V**H (the right singular vectors, stored rowwise);
!>          if JOBVT = 'N' or 'O', VT is not referenced.
!> 
[in]LDVT
!>          LDVT is INTEGER
!>          The leading dimension of the array VT.  LDVT >= 1; if
!>          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
!> 
[out]WORK
!>          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
!>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          The dimension of the array WORK.
!>          LWORK >=  MAX(1,2*MIN(M,N)+MAX(M,N)).
!>          For good performance, LWORK should generally be larger.
!>
!>          If LWORK = -1, then a workspace query is assumed; the routine
!>          only calculates the optimal size of the WORK array, returns
!>          this value as the first entry of the WORK array, and no error
!>          message related to LWORK is issued by XERBLA.
!> 
[out]RWORK
!>          RWORK is DOUBLE PRECISION array, dimension (5*min(M,N))
!>          On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the
!>          unconverged superdiagonal elements of an upper bidiagonal
!>          matrix B whose diagonal is in S (not necessarily sorted).
!>          B satisfies A = U * B * VT, so it has the same singular
!>          values as A, and singular vectors related by U and VT.
!> 
[out]INFO
!>          INFO is INTEGER
!>          = 0:  successful exit.
!>          < 0:  if INFO = -i, the i-th argument had an illegal value.
!>          > 0:  if ZBDSQR did not converge, INFO specifies how many
!>                superdiagonals of an intermediate bidiagonal form B
!>                did not converge to zero. See the description of RWORK
!>                above for details.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 212 of file zgesvd.f.

214*
215* -- LAPACK driver routine --
216* -- LAPACK is a software package provided by Univ. of Tennessee, --
217* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
218*
219* .. Scalar Arguments ..
220 CHARACTER JOBU, JOBVT
221 INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
222* ..
223* .. Array Arguments ..
224 DOUBLE PRECISION RWORK( * ), S( * )
225 COMPLEX*16 A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
226 $ WORK( * )
227* ..
228*
229* =====================================================================
230*
231* .. Parameters ..
232 COMPLEX*16 CZERO, CONE
233 parameter( czero = ( 0.0d0, 0.0d0 ),
234 $ cone = ( 1.0d0, 0.0d0 ) )
235 DOUBLE PRECISION ZERO, ONE
236 parameter( zero = 0.0d0, one = 1.0d0 )
237* ..
238* .. Local Scalars ..
239 LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
240 $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
241 INTEGER BLK, CHUNK, I, IE, IERR, IR, IRWORK, ISCL,
242 $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
243 $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
244 $ NRVT, WRKBL
245 INTEGER LWORK_ZGEQRF, LWORK_ZUNGQR_N, LWORK_ZUNGQR_M,
246 $ LWORK_ZGEBRD, LWORK_ZUNGBR_P, LWORK_ZUNGBR_Q,
247 $ LWORK_ZGELQF, LWORK_ZUNGLQ_N, LWORK_ZUNGLQ_M
248 DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM
249* ..
250* .. Local Arrays ..
251 DOUBLE PRECISION DUM( 1 )
252 COMPLEX*16 CDUM( 1 )
253* ..
254* .. External Subroutines ..
255 EXTERNAL dlascl, xerbla, zbdsqr, zgebrd, zgelqf, zgemm,
257 $ zungqr, zunmbr
258* ..
259* .. External Functions ..
260 LOGICAL LSAME
261 INTEGER ILAENV
262 DOUBLE PRECISION DLAMCH, ZLANGE
263 EXTERNAL lsame, ilaenv, dlamch, zlange
264* ..
265* .. Intrinsic Functions ..
266 INTRINSIC max, min, sqrt
267* ..
268* .. Executable Statements ..
269*
270* Test the input arguments
271*
272 info = 0
273 minmn = min( m, n )
274 wntua = lsame( jobu, 'A' )
275 wntus = lsame( jobu, 'S' )
276 wntuas = wntua .OR. wntus
277 wntuo = lsame( jobu, 'O' )
278 wntun = lsame( jobu, 'N' )
279 wntva = lsame( jobvt, 'A' )
280 wntvs = lsame( jobvt, 'S' )
281 wntvas = wntva .OR. wntvs
282 wntvo = lsame( jobvt, 'O' )
283 wntvn = lsame( jobvt, 'N' )
284 lquery = ( lwork.EQ.-1 )
285*
286 IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
287 info = -1
288 ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
289 $ ( wntvo .AND. wntuo ) ) THEN
290 info = -2
291 ELSE IF( m.LT.0 ) THEN
292 info = -3
293 ELSE IF( n.LT.0 ) THEN
294 info = -4
295 ELSE IF( lda.LT.max( 1, m ) ) THEN
296 info = -6
297 ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
298 info = -9
299 ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
300 $ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
301 info = -11
302 END IF
303*
304* Compute workspace
305* (Note: Comments in the code beginning "Workspace:" describe the
306* minimal amount of workspace needed at that point in the code,
307* as well as the preferred amount for good performance.
308* CWorkspace refers to complex workspace, and RWorkspace to
309* real workspace. NB refers to the optimal block size for the
310* immediately following subroutine, as returned by ILAENV.)
311*
312 IF( info.EQ.0 ) THEN
313 minwrk = 1
314 maxwrk = 1
315 IF( m.GE.n .AND. minmn.GT.0 ) THEN
316*
317* Space needed for ZBDSQR is BDSPAC = 5*N
318*
319 mnthr = ilaenv( 6, 'ZGESVD', jobu // jobvt, m, n, 0, 0 )
320* Compute space needed for ZGEQRF
321 CALL zgeqrf( m, n, a, lda, cdum(1), cdum(1), -1, ierr )
322 lwork_zgeqrf = int( cdum(1) )
323* Compute space needed for ZUNGQR
324 CALL zungqr( m, n, n, a, lda, cdum(1), cdum(1), -1, ierr )
325 lwork_zungqr_n = int( cdum(1) )
326 CALL zungqr( m, m, n, a, lda, cdum(1), cdum(1), -1, ierr )
327 lwork_zungqr_m = int( cdum(1) )
328* Compute space needed for ZGEBRD
329 CALL zgebrd( n, n, a, lda, s, dum(1), cdum(1),
330 $ cdum(1), cdum(1), -1, ierr )
331 lwork_zgebrd = int( cdum(1) )
332* Compute space needed for ZUNGBR
333 CALL zungbr( 'P', n, n, n, a, lda, cdum(1),
334 $ cdum(1), -1, ierr )
335 lwork_zungbr_p = int( cdum(1) )
336 CALL zungbr( 'Q', n, n, n, a, lda, cdum(1),
337 $ cdum(1), -1, ierr )
338 lwork_zungbr_q = int( cdum(1) )
339*
340 IF( m.GE.mnthr ) THEN
341 IF( wntun ) THEN
342*
343* Path 1 (M much larger than N, JOBU='N')
344*
345 maxwrk = n + lwork_zgeqrf
346 maxwrk = max( maxwrk, 2*n+lwork_zgebrd )
347 IF( wntvo .OR. wntvas )
348 $ maxwrk = max( maxwrk, 2*n+lwork_zungbr_p )
349 minwrk = 3*n
350 ELSE IF( wntuo .AND. wntvn ) THEN
351*
352* Path 2 (M much larger than N, JOBU='O', JOBVT='N')
353*
354 wrkbl = n + lwork_zgeqrf
355 wrkbl = max( wrkbl, n+lwork_zungqr_n )
356 wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
357 wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
358 maxwrk = max( n*n+wrkbl, n*n+m*n )
359 minwrk = 2*n + m
360 ELSE IF( wntuo .AND. wntvas ) THEN
361*
362* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
363* 'A')
364*
365 wrkbl = n + lwork_zgeqrf
366 wrkbl = max( wrkbl, n+lwork_zungqr_n )
367 wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
368 wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
369 wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
370 maxwrk = max( n*n+wrkbl, n*n+m*n )
371 minwrk = 2*n + m
372 ELSE IF( wntus .AND. wntvn ) THEN
373*
374* Path 4 (M much larger than N, JOBU='S', JOBVT='N')
375*
376 wrkbl = n + lwork_zgeqrf
377 wrkbl = max( wrkbl, n+lwork_zungqr_n )
378 wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
379 wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
380 maxwrk = n*n + wrkbl
381 minwrk = 2*n + m
382 ELSE IF( wntus .AND. wntvo ) THEN
383*
384* Path 5 (M much larger than N, JOBU='S', JOBVT='O')
385*
386 wrkbl = n + lwork_zgeqrf
387 wrkbl = max( wrkbl, n+lwork_zungqr_n )
388 wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
389 wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
390 wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
391 maxwrk = 2*n*n + wrkbl
392 minwrk = 2*n + m
393 ELSE IF( wntus .AND. wntvas ) THEN
394*
395* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
396* 'A')
397*
398 wrkbl = n + lwork_zgeqrf
399 wrkbl = max( wrkbl, n+lwork_zungqr_n )
400 wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
401 wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
402 wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
403 maxwrk = n*n + wrkbl
404 minwrk = 2*n + m
405 ELSE IF( wntua .AND. wntvn ) THEN
406*
407* Path 7 (M much larger than N, JOBU='A', JOBVT='N')
408*
409 wrkbl = n + lwork_zgeqrf
410 wrkbl = max( wrkbl, n+lwork_zungqr_m )
411 wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
412 wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
413 maxwrk = n*n + wrkbl
414 minwrk = 2*n + m
415 ELSE IF( wntua .AND. wntvo ) THEN
416*
417* Path 8 (M much larger than N, JOBU='A', JOBVT='O')
418*
419 wrkbl = n + lwork_zgeqrf
420 wrkbl = max( wrkbl, n+lwork_zungqr_m )
421 wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
422 wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
423 wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
424 maxwrk = 2*n*n + wrkbl
425 minwrk = 2*n + m
426 ELSE IF( wntua .AND. wntvas ) THEN
427*
428* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
429* 'A')
430*
431 wrkbl = n + lwork_zgeqrf
432 wrkbl = max( wrkbl, n+lwork_zungqr_m )
433 wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
434 wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
435 wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
436 maxwrk = n*n + wrkbl
437 minwrk = 2*n + m
438 END IF
439 ELSE
440*
441* Path 10 (M at least N, but not much larger)
442*
443 CALL zgebrd( m, n, a, lda, s, dum(1), cdum(1),
444 $ cdum(1), cdum(1), -1, ierr )
445 lwork_zgebrd = int( cdum(1) )
446 maxwrk = 2*n + lwork_zgebrd
447 IF( wntus .OR. wntuo ) THEN
448 CALL zungbr( 'Q', m, n, n, a, lda, cdum(1),
449 $ cdum(1), -1, ierr )
450 lwork_zungbr_q = int( cdum(1) )
451 maxwrk = max( maxwrk, 2*n+lwork_zungbr_q )
452 END IF
453 IF( wntua ) THEN
454 CALL zungbr( 'Q', m, m, n, a, lda, cdum(1),
455 $ cdum(1), -1, ierr )
456 lwork_zungbr_q = int( cdum(1) )
457 maxwrk = max( maxwrk, 2*n+lwork_zungbr_q )
458 END IF
459 IF( .NOT.wntvn ) THEN
460 maxwrk = max( maxwrk, 2*n+lwork_zungbr_p )
461 END IF
462 minwrk = 2*n + m
463 END IF
464 ELSE IF( minmn.GT.0 ) THEN
465*
466* Space needed for ZBDSQR is BDSPAC = 5*M
467*
468 mnthr = ilaenv( 6, 'ZGESVD', jobu // jobvt, m, n, 0, 0 )
469* Compute space needed for ZGELQF
470 CALL zgelqf( m, n, a, lda, cdum(1), cdum(1), -1, ierr )
471 lwork_zgelqf = int( cdum(1) )
472* Compute space needed for ZUNGLQ
473 CALL zunglq( n, n, m, cdum(1), n, cdum(1), cdum(1), -1,
474 $ ierr )
475 lwork_zunglq_n = int( cdum(1) )
476 CALL zunglq( m, n, m, a, lda, cdum(1), cdum(1), -1, ierr )
477 lwork_zunglq_m = int( cdum(1) )
478* Compute space needed for ZGEBRD
479 CALL zgebrd( m, m, a, lda, s, dum(1), cdum(1),
480 $ cdum(1), cdum(1), -1, ierr )
481 lwork_zgebrd = int( cdum(1) )
482* Compute space needed for ZUNGBR P
483 CALL zungbr( 'P', m, m, m, a, n, cdum(1),
484 $ cdum(1), -1, ierr )
485 lwork_zungbr_p = int( cdum(1) )
486* Compute space needed for ZUNGBR Q
487 CALL zungbr( 'Q', m, m, m, a, n, cdum(1),
488 $ cdum(1), -1, ierr )
489 lwork_zungbr_q = int( cdum(1) )
490 IF( n.GE.mnthr ) THEN
491 IF( wntvn ) THEN
492*
493* Path 1t(N much larger than M, JOBVT='N')
494*
495 maxwrk = m + lwork_zgelqf
496 maxwrk = max( maxwrk, 2*m+lwork_zgebrd )
497 IF( wntuo .OR. wntuas )
498 $ maxwrk = max( maxwrk, 2*m+lwork_zungbr_q )
499 minwrk = 3*m
500 ELSE IF( wntvo .AND. wntun ) THEN
501*
502* Path 2t(N much larger than M, JOBU='N', JOBVT='O')
503*
504 wrkbl = m + lwork_zgelqf
505 wrkbl = max( wrkbl, m+lwork_zunglq_m )
506 wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
507 wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
508 maxwrk = max( m*m+wrkbl, m*m+m*n )
509 minwrk = 2*m + n
510 ELSE IF( wntvo .AND. wntuas ) THEN
511*
512* Path 3t(N much larger than M, JOBU='S' or 'A',
513* JOBVT='O')
514*
515 wrkbl = m + lwork_zgelqf
516 wrkbl = max( wrkbl, m+lwork_zunglq_m )
517 wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
518 wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
519 wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
520 maxwrk = max( m*m+wrkbl, m*m+m*n )
521 minwrk = 2*m + n
522 ELSE IF( wntvs .AND. wntun ) THEN
523*
524* Path 4t(N much larger than M, JOBU='N', JOBVT='S')
525*
526 wrkbl = m + lwork_zgelqf
527 wrkbl = max( wrkbl, m+lwork_zunglq_m )
528 wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
529 wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
530 maxwrk = m*m + wrkbl
531 minwrk = 2*m + n
532 ELSE IF( wntvs .AND. wntuo ) THEN
533*
534* Path 5t(N much larger than M, JOBU='O', JOBVT='S')
535*
536 wrkbl = m + lwork_zgelqf
537 wrkbl = max( wrkbl, m+lwork_zunglq_m )
538 wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
539 wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
540 wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
541 maxwrk = 2*m*m + wrkbl
542 minwrk = 2*m + n
543 ELSE IF( wntvs .AND. wntuas ) THEN
544*
545* Path 6t(N much larger than M, JOBU='S' or 'A',
546* JOBVT='S')
547*
548 wrkbl = m + lwork_zgelqf
549 wrkbl = max( wrkbl, m+lwork_zunglq_m )
550 wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
551 wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
552 wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
553 maxwrk = m*m + wrkbl
554 minwrk = 2*m + n
555 ELSE IF( wntva .AND. wntun ) THEN
556*
557* Path 7t(N much larger than M, JOBU='N', JOBVT='A')
558*
559 wrkbl = m + lwork_zgelqf
560 wrkbl = max( wrkbl, m+lwork_zunglq_n )
561 wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
562 wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
563 maxwrk = m*m + wrkbl
564 minwrk = 2*m + n
565 ELSE IF( wntva .AND. wntuo ) THEN
566*
567* Path 8t(N much larger than M, JOBU='O', JOBVT='A')
568*
569 wrkbl = m + lwork_zgelqf
570 wrkbl = max( wrkbl, m+lwork_zunglq_n )
571 wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
572 wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
573 wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
574 maxwrk = 2*m*m + wrkbl
575 minwrk = 2*m + n
576 ELSE IF( wntva .AND. wntuas ) THEN
577*
578* Path 9t(N much larger than M, JOBU='S' or 'A',
579* JOBVT='A')
580*
581 wrkbl = m + lwork_zgelqf
582 wrkbl = max( wrkbl, m+lwork_zunglq_n )
583 wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
584 wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
585 wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
586 maxwrk = m*m + wrkbl
587 minwrk = 2*m + n
588 END IF
589 ELSE
590*
591* Path 10t(N greater than M, but not much larger)
592*
593 CALL zgebrd( m, n, a, lda, s, dum(1), cdum(1),
594 $ cdum(1), cdum(1), -1, ierr )
595 lwork_zgebrd = int( cdum(1) )
596 maxwrk = 2*m + lwork_zgebrd
597 IF( wntvs .OR. wntvo ) THEN
598* Compute space needed for ZUNGBR P
599 CALL zungbr( 'P', m, n, m, a, n, cdum(1),
600 $ cdum(1), -1, ierr )
601 lwork_zungbr_p = int( cdum(1) )
602 maxwrk = max( maxwrk, 2*m+lwork_zungbr_p )
603 END IF
604 IF( wntva ) THEN
605 CALL zungbr( 'P', n, n, m, a, n, cdum(1),
606 $ cdum(1), -1, ierr )
607 lwork_zungbr_p = int( cdum(1) )
608 maxwrk = max( maxwrk, 2*m+lwork_zungbr_p )
609 END IF
610 IF( .NOT.wntun ) THEN
611 maxwrk = max( maxwrk, 2*m+lwork_zungbr_q )
612 END IF
613 minwrk = 2*m + n
614 END IF
615 END IF
616 maxwrk = max( maxwrk, minwrk )
617 work( 1 ) = maxwrk
618*
619 IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
620 info = -13
621 END IF
622 END IF
623*
624 IF( info.NE.0 ) THEN
625 CALL xerbla( 'ZGESVD', -info )
626 RETURN
627 ELSE IF( lquery ) THEN
628 RETURN
629 END IF
630*
631* Quick return if possible
632*
633 IF( m.EQ.0 .OR. n.EQ.0 ) THEN
634 RETURN
635 END IF
636*
637* Get machine constants
638*
639 eps = dlamch( 'P' )
640 smlnum = sqrt( dlamch( 'S' ) ) / eps
641 bignum = one / smlnum
642*
643* Scale A if max element outside range [SMLNUM,BIGNUM]
644*
645 anrm = zlange( 'M', m, n, a, lda, dum )
646 iscl = 0
647 IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
648 iscl = 1
649 CALL zlascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
650 ELSE IF( anrm.GT.bignum ) THEN
651 iscl = 1
652 CALL zlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
653 END IF
654*
655 IF( m.GE.n ) THEN
656*
657* A has at least as many rows as columns. If A has sufficiently
658* more rows than columns, first reduce using the QR
659* decomposition (if sufficient workspace available)
660*
661 IF( m.GE.mnthr ) THEN
662*
663 IF( wntun ) THEN
664*
665* Path 1 (M much larger than N, JOBU='N')
666* No left singular vectors to be computed
667*
668 itau = 1
669 iwork = itau + n
670*
671* Compute A=Q*R
672* (CWorkspace: need 2*N, prefer N+N*NB)
673* (RWorkspace: need 0)
674*
675 CALL zgeqrf( m, n, a, lda, work( itau ), work( iwork ),
676 $ lwork-iwork+1, ierr )
677*
678* Zero out below R
679*
680 IF( n .GT. 1 ) THEN
681 CALL zlaset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),
682 $ lda )
683 END IF
684 ie = 1
685 itauq = 1
686 itaup = itauq + n
687 iwork = itaup + n
688*
689* Bidiagonalize R in A
690* (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
691* (RWorkspace: need N)
692*
693 CALL zgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),
694 $ work( itaup ), work( iwork ), lwork-iwork+1,
695 $ ierr )
696 ncvt = 0
697 IF( wntvo .OR. wntvas ) THEN
698*
699* If right singular vectors desired, generate P'.
700* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
701* (RWorkspace: 0)
702*
703 CALL zungbr( 'P', n, n, n, a, lda, work( itaup ),
704 $ work( iwork ), lwork-iwork+1, ierr )
705 ncvt = n
706 END IF
707 irwork = ie + n
708*
709* Perform bidiagonal QR iteration, computing right
710* singular vectors of A in A if desired
711* (CWorkspace: 0)
712* (RWorkspace: need BDSPAC)
713*
714 CALL zbdsqr( 'U', n, ncvt, 0, 0, s, rwork( ie ), a, lda,
715 $ cdum, 1, cdum, 1, rwork( irwork ), info )
716*
717* If right singular vectors desired in VT, copy them there
718*
719 IF( wntvas )
720 $ CALL zlacpy( 'F', n, n, a, lda, vt, ldvt )
721*
722 ELSE IF( wntuo .AND. wntvn ) THEN
723*
724* Path 2 (M much larger than N, JOBU='O', JOBVT='N')
725* N left singular vectors to be overwritten on A and
726* no right singular vectors to be computed
727*
728 IF( lwork.GE.n*n+3*n ) THEN
729*
730* Sufficient workspace for a fast algorithm
731*
732 ir = 1
733 IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN
734*
735* WORK(IU) is LDA by N, WORK(IR) is LDA by N
736*
737 ldwrku = lda
738 ldwrkr = lda
739 ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN
740*
741* WORK(IU) is LDA by N, WORK(IR) is N by N
742*
743 ldwrku = lda
744 ldwrkr = n
745 ELSE
746*
747* WORK(IU) is LDWRKU by N, WORK(IR) is N by N
748*
749 ldwrku = ( lwork-n*n ) / n
750 ldwrkr = n
751 END IF
752 itau = ir + ldwrkr*n
753 iwork = itau + n
754*
755* Compute A=Q*R
756* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
757* (RWorkspace: 0)
758*
759 CALL zgeqrf( m, n, a, lda, work( itau ),
760 $ work( iwork ), lwork-iwork+1, ierr )
761*
762* Copy R to WORK(IR) and zero out below it
763*
764 CALL zlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
765 CALL zlaset( 'L', n-1, n-1, czero, czero,
766 $ work( ir+1 ), ldwrkr )
767*
768* Generate Q in A
769* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
770* (RWorkspace: 0)
771*
772 CALL zungqr( m, n, n, a, lda, work( itau ),
773 $ work( iwork ), lwork-iwork+1, ierr )
774 ie = 1
775 itauq = itau
776 itaup = itauq + n
777 iwork = itaup + n
778*
779* Bidiagonalize R in WORK(IR)
780* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
781* (RWorkspace: need N)
782*
783 CALL zgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),
784 $ work( itauq ), work( itaup ),
785 $ work( iwork ), lwork-iwork+1, ierr )
786*
787* Generate left vectors bidiagonalizing R
788* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
789* (RWorkspace: need 0)
790*
791 CALL zungbr( 'Q', n, n, n, work( ir ), ldwrkr,
792 $ work( itauq ), work( iwork ),
793 $ lwork-iwork+1, ierr )
794 irwork = ie + n
795*
796* Perform bidiagonal QR iteration, computing left
797* singular vectors of R in WORK(IR)
798* (CWorkspace: need N*N)
799* (RWorkspace: need BDSPAC)
800*
801 CALL zbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum, 1,
802 $ work( ir ), ldwrkr, cdum, 1,
803 $ rwork( irwork ), info )
804 iu = itauq
805*
806* Multiply Q in A by left singular vectors of R in
807* WORK(IR), storing result in WORK(IU) and copying to A
808* (CWorkspace: need N*N+N, prefer N*N+M*N)
809* (RWorkspace: 0)
810*
811 DO 10 i = 1, m, ldwrku
812 chunk = min( m-i+1, ldwrku )
813 CALL zgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),
814 $ lda, work( ir ), ldwrkr, czero,
815 $ work( iu ), ldwrku )
816 CALL zlacpy( 'F', chunk, n, work( iu ), ldwrku,
817 $ a( i, 1 ), lda )
818 10 CONTINUE
819*
820 ELSE
821*
822* Insufficient workspace for a fast algorithm
823*
824 ie = 1
825 itauq = 1
826 itaup = itauq + n
827 iwork = itaup + n
828*
829* Bidiagonalize A
830* (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
831* (RWorkspace: N)
832*
833 CALL zgebrd( m, n, a, lda, s, rwork( ie ),
834 $ work( itauq ), work( itaup ),
835 $ work( iwork ), lwork-iwork+1, ierr )
836*
837* Generate left vectors bidiagonalizing A
838* (CWorkspace: need 3*N, prefer 2*N+N*NB)
839* (RWorkspace: 0)
840*
841 CALL zungbr( 'Q', m, n, n, a, lda, work( itauq ),
842 $ work( iwork ), lwork-iwork+1, ierr )
843 irwork = ie + n
844*
845* Perform bidiagonal QR iteration, computing left
846* singular vectors of A in A
847* (CWorkspace: need 0)
848* (RWorkspace: need BDSPAC)
849*
850 CALL zbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum, 1,
851 $ a, lda, cdum, 1, rwork( irwork ), info )
852*
853 END IF
854*
855 ELSE IF( wntuo .AND. wntvas ) THEN
856*
857* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
858* N left singular vectors to be overwritten on A and
859* N right singular vectors to be computed in VT
860*
861 IF( lwork.GE.n*n+3*n ) THEN
862*
863* Sufficient workspace for a fast algorithm
864*
865 ir = 1
866 IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN
867*
868* WORK(IU) is LDA by N and WORK(IR) is LDA by N
869*
870 ldwrku = lda
871 ldwrkr = lda
872 ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN
873*
874* WORK(IU) is LDA by N and WORK(IR) is N by N
875*
876 ldwrku = lda
877 ldwrkr = n
878 ELSE
879*
880* WORK(IU) is LDWRKU by N and WORK(IR) is N by N
881*
882 ldwrku = ( lwork-n*n ) / n
883 ldwrkr = n
884 END IF
885 itau = ir + ldwrkr*n
886 iwork = itau + n
887*
888* Compute A=Q*R
889* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
890* (RWorkspace: 0)
891*
892 CALL zgeqrf( m, n, a, lda, work( itau ),
893 $ work( iwork ), lwork-iwork+1, ierr )
894*
895* Copy R to VT, zeroing out below it
896*
897 CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
898 IF( n.GT.1 )
899 $ CALL zlaset( 'L', n-1, n-1, czero, czero,
900 $ vt( 2, 1 ), ldvt )
901*
902* Generate Q in A
903* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
904* (RWorkspace: 0)
905*
906 CALL zungqr( m, n, n, a, lda, work( itau ),
907 $ work( iwork ), lwork-iwork+1, ierr )
908 ie = 1
909 itauq = itau
910 itaup = itauq + n
911 iwork = itaup + n
912*
913* Bidiagonalize R in VT, copying result to WORK(IR)
914* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
915* (RWorkspace: need N)
916*
917 CALL zgebrd( n, n, vt, ldvt, s, rwork( ie ),
918 $ work( itauq ), work( itaup ),
919 $ work( iwork ), lwork-iwork+1, ierr )
920 CALL zlacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
921*
922* Generate left vectors bidiagonalizing R in WORK(IR)
923* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
924* (RWorkspace: 0)
925*
926 CALL zungbr( 'Q', n, n, n, work( ir ), ldwrkr,
927 $ work( itauq ), work( iwork ),
928 $ lwork-iwork+1, ierr )
929*
930* Generate right vectors bidiagonalizing R in VT
931* (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB)
932* (RWorkspace: 0)
933*
934 CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
935 $ work( iwork ), lwork-iwork+1, ierr )
936 irwork = ie + n
937*
938* Perform bidiagonal QR iteration, computing left
939* singular vectors of R in WORK(IR) and computing right
940* singular vectors of R in VT
941* (CWorkspace: need N*N)
942* (RWorkspace: need BDSPAC)
943*
944 CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
945 $ ldvt, work( ir ), ldwrkr, cdum, 1,
946 $ rwork( irwork ), info )
947 iu = itauq
948*
949* Multiply Q in A by left singular vectors of R in
950* WORK(IR), storing result in WORK(IU) and copying to A
951* (CWorkspace: need N*N+N, prefer N*N+M*N)
952* (RWorkspace: 0)
953*
954 DO 20 i = 1, m, ldwrku
955 chunk = min( m-i+1, ldwrku )
956 CALL zgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),
957 $ lda, work( ir ), ldwrkr, czero,
958 $ work( iu ), ldwrku )
959 CALL zlacpy( 'F', chunk, n, work( iu ), ldwrku,
960 $ a( i, 1 ), lda )
961 20 CONTINUE
962*
963 ELSE
964*
965* Insufficient workspace for a fast algorithm
966*
967 itau = 1
968 iwork = itau + n
969*
970* Compute A=Q*R
971* (CWorkspace: need 2*N, prefer N+N*NB)
972* (RWorkspace: 0)
973*
974 CALL zgeqrf( m, n, a, lda, work( itau ),
975 $ work( iwork ), lwork-iwork+1, ierr )
976*
977* Copy R to VT, zeroing out below it
978*
979 CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
980 IF( n.GT.1 )
981 $ CALL zlaset( 'L', n-1, n-1, czero, czero,
982 $ vt( 2, 1 ), ldvt )
983*
984* Generate Q in A
985* (CWorkspace: need 2*N, prefer N+N*NB)
986* (RWorkspace: 0)
987*
988 CALL zungqr( m, n, n, a, lda, work( itau ),
989 $ work( iwork ), lwork-iwork+1, ierr )
990 ie = 1
991 itauq = itau
992 itaup = itauq + n
993 iwork = itaup + n
994*
995* Bidiagonalize R in VT
996* (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
997* (RWorkspace: N)
998*
999 CALL zgebrd( n, n, vt, ldvt, s, rwork( ie ),
1000 $ work( itauq ), work( itaup ),
1001 $ work( iwork ), lwork-iwork+1, ierr )
1002*
1003* Multiply Q in A by left vectors bidiagonalizing R
1004* (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1005* (RWorkspace: 0)
1006*
1007 CALL zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1008 $ work( itauq ), a, lda, work( iwork ),
1009 $ lwork-iwork+1, ierr )
1010*
1011* Generate right vectors bidiagonalizing R in VT
1012* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1013* (RWorkspace: 0)
1014*
1015 CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1016 $ work( iwork ), lwork-iwork+1, ierr )
1017 irwork = ie + n
1018*
1019* Perform bidiagonal QR iteration, computing left
1020* singular vectors of A in A and computing right
1021* singular vectors of A in VT
1022* (CWorkspace: 0)
1023* (RWorkspace: need BDSPAC)
1024*
1025 CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
1026 $ ldvt, a, lda, cdum, 1, rwork( irwork ),
1027 $ info )
1028*
1029 END IF
1030*
1031 ELSE IF( wntus ) THEN
1032*
1033 IF( wntvn ) THEN
1034*
1035* Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1036* N left singular vectors to be computed in U and
1037* no right singular vectors to be computed
1038*
1039 IF( lwork.GE.n*n+3*n ) THEN
1040*
1041* Sufficient workspace for a fast algorithm
1042*
1043 ir = 1
1044 IF( lwork.GE.wrkbl+lda*n ) THEN
1045*
1046* WORK(IR) is LDA by N
1047*
1048 ldwrkr = lda
1049 ELSE
1050*
1051* WORK(IR) is N by N
1052*
1053 ldwrkr = n
1054 END IF
1055 itau = ir + ldwrkr*n
1056 iwork = itau + n
1057*
1058* Compute A=Q*R
1059* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1060* (RWorkspace: 0)
1061*
1062 CALL zgeqrf( m, n, a, lda, work( itau ),
1063 $ work( iwork ), lwork-iwork+1, ierr )
1064*
1065* Copy R to WORK(IR), zeroing out below it
1066*
1067 CALL zlacpy( 'U', n, n, a, lda, work( ir ),
1068 $ ldwrkr )
1069 CALL zlaset( 'L', n-1, n-1, czero, czero,
1070 $ work( ir+1 ), ldwrkr )
1071*
1072* Generate Q in A
1073* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1074* (RWorkspace: 0)
1075*
1076 CALL zungqr( m, n, n, a, lda, work( itau ),
1077 $ work( iwork ), lwork-iwork+1, ierr )
1078 ie = 1
1079 itauq = itau
1080 itaup = itauq + n
1081 iwork = itaup + n
1082*
1083* Bidiagonalize R in WORK(IR)
1084* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1085* (RWorkspace: need N)
1086*
1087 CALL zgebrd( n, n, work( ir ), ldwrkr, s,
1088 $ rwork( ie ), work( itauq ),
1089 $ work( itaup ), work( iwork ),
1090 $ lwork-iwork+1, ierr )
1091*
1092* Generate left vectors bidiagonalizing R in WORK(IR)
1093* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1094* (RWorkspace: 0)
1095*
1096 CALL zungbr( 'Q', n, n, n, work( ir ), ldwrkr,
1097 $ work( itauq ), work( iwork ),
1098 $ lwork-iwork+1, ierr )
1099 irwork = ie + n
1100*
1101* Perform bidiagonal QR iteration, computing left
1102* singular vectors of R in WORK(IR)
1103* (CWorkspace: need N*N)
1104* (RWorkspace: need BDSPAC)
1105*
1106 CALL zbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,
1107 $ 1, work( ir ), ldwrkr, cdum, 1,
1108 $ rwork( irwork ), info )
1109*
1110* Multiply Q in A by left singular vectors of R in
1111* WORK(IR), storing result in U
1112* (CWorkspace: need N*N)
1113* (RWorkspace: 0)
1114*
1115 CALL zgemm( 'N', 'N', m, n, n, cone, a, lda,
1116 $ work( ir ), ldwrkr, czero, u, ldu )
1117*
1118 ELSE
1119*
1120* Insufficient workspace for a fast algorithm
1121*
1122 itau = 1
1123 iwork = itau + n
1124*
1125* Compute A=Q*R, copying result to U
1126* (CWorkspace: need 2*N, prefer N+N*NB)
1127* (RWorkspace: 0)
1128*
1129 CALL zgeqrf( m, n, a, lda, work( itau ),
1130 $ work( iwork ), lwork-iwork+1, ierr )
1131 CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1132*
1133* Generate Q in U
1134* (CWorkspace: need 2*N, prefer N+N*NB)
1135* (RWorkspace: 0)
1136*
1137 CALL zungqr( m, n, n, u, ldu, work( itau ),
1138 $ work( iwork ), lwork-iwork+1, ierr )
1139 ie = 1
1140 itauq = itau
1141 itaup = itauq + n
1142 iwork = itaup + n
1143*
1144* Zero out below R in A
1145*
1146 IF( n .GT. 1 ) THEN
1147 CALL zlaset( 'L', n-1, n-1, czero, czero,
1148 $ a( 2, 1 ), lda )
1149 END IF
1150*
1151* Bidiagonalize R in A
1152* (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1153* (RWorkspace: need N)
1154*
1155 CALL zgebrd( n, n, a, lda, s, rwork( ie ),
1156 $ work( itauq ), work( itaup ),
1157 $ work( iwork ), lwork-iwork+1, ierr )
1158*
1159* Multiply Q in U by left vectors bidiagonalizing R
1160* (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1161* (RWorkspace: 0)
1162*
1163 CALL zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1164 $ work( itauq ), u, ldu, work( iwork ),
1165 $ lwork-iwork+1, ierr )
1166 irwork = ie + n
1167*
1168* Perform bidiagonal QR iteration, computing left
1169* singular vectors of A in U
1170* (CWorkspace: 0)
1171* (RWorkspace: need BDSPAC)
1172*
1173 CALL zbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,
1174 $ 1, u, ldu, cdum, 1, rwork( irwork ),
1175 $ info )
1176*
1177 END IF
1178*
1179 ELSE IF( wntvo ) THEN
1180*
1181* Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1182* N left singular vectors to be computed in U and
1183* N right singular vectors to be overwritten on A
1184*
1185 IF( lwork.GE.2*n*n+3*n ) THEN
1186*
1187* Sufficient workspace for a fast algorithm
1188*
1189 iu = 1
1190 IF( lwork.GE.wrkbl+2*lda*n ) THEN
1191*
1192* WORK(IU) is LDA by N and WORK(IR) is LDA by N
1193*
1194 ldwrku = lda
1195 ir = iu + ldwrku*n
1196 ldwrkr = lda
1197 ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1198*
1199* WORK(IU) is LDA by N and WORK(IR) is N by N
1200*
1201 ldwrku = lda
1202 ir = iu + ldwrku*n
1203 ldwrkr = n
1204 ELSE
1205*
1206* WORK(IU) is N by N and WORK(IR) is N by N
1207*
1208 ldwrku = n
1209 ir = iu + ldwrku*n
1210 ldwrkr = n
1211 END IF
1212 itau = ir + ldwrkr*n
1213 iwork = itau + n
1214*
1215* Compute A=Q*R
1216* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1217* (RWorkspace: 0)
1218*
1219 CALL zgeqrf( m, n, a, lda, work( itau ),
1220 $ work( iwork ), lwork-iwork+1, ierr )
1221*
1222* Copy R to WORK(IU), zeroing out below it
1223*
1224 CALL zlacpy( 'U', n, n, a, lda, work( iu ),
1225 $ ldwrku )
1226 CALL zlaset( 'L', n-1, n-1, czero, czero,
1227 $ work( iu+1 ), ldwrku )
1228*
1229* Generate Q in A
1230* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1231* (RWorkspace: 0)
1232*
1233 CALL zungqr( m, n, n, a, lda, work( itau ),
1234 $ work( iwork ), lwork-iwork+1, ierr )
1235 ie = 1
1236 itauq = itau
1237 itaup = itauq + n
1238 iwork = itaup + n
1239*
1240* Bidiagonalize R in WORK(IU), copying result to
1241* WORK(IR)
1242* (CWorkspace: need 2*N*N+3*N,
1243* prefer 2*N*N+2*N+2*N*NB)
1244* (RWorkspace: need N)
1245*
1246 CALL zgebrd( n, n, work( iu ), ldwrku, s,
1247 $ rwork( ie ), work( itauq ),
1248 $ work( itaup ), work( iwork ),
1249 $ lwork-iwork+1, ierr )
1250 CALL zlacpy( 'U', n, n, work( iu ), ldwrku,
1251 $ work( ir ), ldwrkr )
1252*
1253* Generate left bidiagonalizing vectors in WORK(IU)
1254* (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1255* (RWorkspace: 0)
1256*
1257 CALL zungbr( 'Q', n, n, n, work( iu ), ldwrku,
1258 $ work( itauq ), work( iwork ),
1259 $ lwork-iwork+1, ierr )
1260*
1261* Generate right bidiagonalizing vectors in WORK(IR)
1262* (CWorkspace: need 2*N*N+3*N-1,
1263* prefer 2*N*N+2*N+(N-1)*NB)
1264* (RWorkspace: 0)
1265*
1266 CALL zungbr( 'P', n, n, n, work( ir ), ldwrkr,
1267 $ work( itaup ), work( iwork ),
1268 $ lwork-iwork+1, ierr )
1269 irwork = ie + n
1270*
1271* Perform bidiagonal QR iteration, computing left
1272* singular vectors of R in WORK(IU) and computing
1273* right singular vectors of R in WORK(IR)
1274* (CWorkspace: need 2*N*N)
1275* (RWorkspace: need BDSPAC)
1276*
1277 CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
1278 $ work( ir ), ldwrkr, work( iu ),
1279 $ ldwrku, cdum, 1, rwork( irwork ),
1280 $ info )
1281*
1282* Multiply Q in A by left singular vectors of R in
1283* WORK(IU), storing result in U
1284* (CWorkspace: need N*N)
1285* (RWorkspace: 0)
1286*
1287 CALL zgemm( 'N', 'N', m, n, n, cone, a, lda,
1288 $ work( iu ), ldwrku, czero, u, ldu )
1289*
1290* Copy right singular vectors of R to A
1291* (CWorkspace: need N*N)
1292* (RWorkspace: 0)
1293*
1294 CALL zlacpy( 'F', n, n, work( ir ), ldwrkr, a,
1295 $ lda )
1296*
1297 ELSE
1298*
1299* Insufficient workspace for a fast algorithm
1300*
1301 itau = 1
1302 iwork = itau + n
1303*
1304* Compute A=Q*R, copying result to U
1305* (CWorkspace: need 2*N, prefer N+N*NB)
1306* (RWorkspace: 0)
1307*
1308 CALL zgeqrf( m, n, a, lda, work( itau ),
1309 $ work( iwork ), lwork-iwork+1, ierr )
1310 CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1311*
1312* Generate Q in U
1313* (CWorkspace: need 2*N, prefer N+N*NB)
1314* (RWorkspace: 0)
1315*
1316 CALL zungqr( m, n, n, u, ldu, work( itau ),
1317 $ work( iwork ), lwork-iwork+1, ierr )
1318 ie = 1
1319 itauq = itau
1320 itaup = itauq + n
1321 iwork = itaup + n
1322*
1323* Zero out below R in A
1324*
1325 IF( n .GT. 1 ) THEN
1326 CALL zlaset( 'L', n-1, n-1, czero, czero,
1327 $ a( 2, 1 ), lda )
1328 END IF
1329*
1330* Bidiagonalize R in A
1331* (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1332* (RWorkspace: need N)
1333*
1334 CALL zgebrd( n, n, a, lda, s, rwork( ie ),
1335 $ work( itauq ), work( itaup ),
1336 $ work( iwork ), lwork-iwork+1, ierr )
1337*
1338* Multiply Q in U by left vectors bidiagonalizing R
1339* (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1340* (RWorkspace: 0)
1341*
1342 CALL zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1343 $ work( itauq ), u, ldu, work( iwork ),
1344 $ lwork-iwork+1, ierr )
1345*
1346* Generate right vectors bidiagonalizing R in A
1347* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1348* (RWorkspace: 0)
1349*
1350 CALL zungbr( 'P', n, n, n, a, lda, work( itaup ),
1351 $ work( iwork ), lwork-iwork+1, ierr )
1352 irwork = ie + n
1353*
1354* Perform bidiagonal QR iteration, computing left
1355* singular vectors of A in U and computing right
1356* singular vectors of A in A
1357* (CWorkspace: 0)
1358* (RWorkspace: need BDSPAC)
1359*
1360 CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,
1361 $ lda, u, ldu, cdum, 1, rwork( irwork ),
1362 $ info )
1363*
1364 END IF
1365*
1366 ELSE IF( wntvas ) THEN
1367*
1368* Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1369* or 'A')
1370* N left singular vectors to be computed in U and
1371* N right singular vectors to be computed in VT
1372*
1373 IF( lwork.GE.n*n+3*n ) THEN
1374*
1375* Sufficient workspace for a fast algorithm
1376*
1377 iu = 1
1378 IF( lwork.GE.wrkbl+lda*n ) THEN
1379*
1380* WORK(IU) is LDA by N
1381*
1382 ldwrku = lda
1383 ELSE
1384*
1385* WORK(IU) is N by N
1386*
1387 ldwrku = n
1388 END IF
1389 itau = iu + ldwrku*n
1390 iwork = itau + n
1391*
1392* Compute A=Q*R
1393* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1394* (RWorkspace: 0)
1395*
1396 CALL zgeqrf( m, n, a, lda, work( itau ),
1397 $ work( iwork ), lwork-iwork+1, ierr )
1398*
1399* Copy R to WORK(IU), zeroing out below it
1400*
1401 CALL zlacpy( 'U', n, n, a, lda, work( iu ),
1402 $ ldwrku )
1403 CALL zlaset( 'L', n-1, n-1, czero, czero,
1404 $ work( iu+1 ), ldwrku )
1405*
1406* Generate Q in A
1407* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1408* (RWorkspace: 0)
1409*
1410 CALL zungqr( m, n, n, a, lda, work( itau ),
1411 $ work( iwork ), lwork-iwork+1, ierr )
1412 ie = 1
1413 itauq = itau
1414 itaup = itauq + n
1415 iwork = itaup + n
1416*
1417* Bidiagonalize R in WORK(IU), copying result to VT
1418* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1419* (RWorkspace: need N)
1420*
1421 CALL zgebrd( n, n, work( iu ), ldwrku, s,
1422 $ rwork( ie ), work( itauq ),
1423 $ work( itaup ), work( iwork ),
1424 $ lwork-iwork+1, ierr )
1425 CALL zlacpy( 'U', n, n, work( iu ), ldwrku, vt,
1426 $ ldvt )
1427*
1428* Generate left bidiagonalizing vectors in WORK(IU)
1429* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1430* (RWorkspace: 0)
1431*
1432 CALL zungbr( 'Q', n, n, n, work( iu ), ldwrku,
1433 $ work( itauq ), work( iwork ),
1434 $ lwork-iwork+1, ierr )
1435*
1436* Generate right bidiagonalizing vectors in VT
1437* (CWorkspace: need N*N+3*N-1,
1438* prefer N*N+2*N+(N-1)*NB)
1439* (RWorkspace: 0)
1440*
1441 CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1442 $ work( iwork ), lwork-iwork+1, ierr )
1443 irwork = ie + n
1444*
1445* Perform bidiagonal QR iteration, computing left
1446* singular vectors of R in WORK(IU) and computing
1447* right singular vectors of R in VT
1448* (CWorkspace: need N*N)
1449* (RWorkspace: need BDSPAC)
1450*
1451 CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
1452 $ ldvt, work( iu ), ldwrku, cdum, 1,
1453 $ rwork( irwork ), info )
1454*
1455* Multiply Q in A by left singular vectors of R in
1456* WORK(IU), storing result in U
1457* (CWorkspace: need N*N)
1458* (RWorkspace: 0)
1459*
1460 CALL zgemm( 'N', 'N', m, n, n, cone, a, lda,
1461 $ work( iu ), ldwrku, czero, u, ldu )
1462*
1463 ELSE
1464*
1465* Insufficient workspace for a fast algorithm
1466*
1467 itau = 1
1468 iwork = itau + n
1469*
1470* Compute A=Q*R, copying result to U
1471* (CWorkspace: need 2*N, prefer N+N*NB)
1472* (RWorkspace: 0)
1473*
1474 CALL zgeqrf( m, n, a, lda, work( itau ),
1475 $ work( iwork ), lwork-iwork+1, ierr )
1476 CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1477*
1478* Generate Q in U
1479* (CWorkspace: need 2*N, prefer N+N*NB)
1480* (RWorkspace: 0)
1481*
1482 CALL zungqr( m, n, n, u, ldu, work( itau ),
1483 $ work( iwork ), lwork-iwork+1, ierr )
1484*
1485* Copy R to VT, zeroing out below it
1486*
1487 CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
1488 IF( n.GT.1 )
1489 $ CALL zlaset( 'L', n-1, n-1, czero, czero,
1490 $ vt( 2, 1 ), ldvt )
1491 ie = 1
1492 itauq = itau
1493 itaup = itauq + n
1494 iwork = itaup + n
1495*
1496* Bidiagonalize R in VT
1497* (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1498* (RWorkspace: need N)
1499*
1500 CALL zgebrd( n, n, vt, ldvt, s, rwork( ie ),
1501 $ work( itauq ), work( itaup ),
1502 $ work( iwork ), lwork-iwork+1, ierr )
1503*
1504* Multiply Q in U by left bidiagonalizing vectors
1505* in VT
1506* (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1507* (RWorkspace: 0)
1508*
1509 CALL zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1510 $ work( itauq ), u, ldu, work( iwork ),
1511 $ lwork-iwork+1, ierr )
1512*
1513* Generate right bidiagonalizing vectors in VT
1514* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1515* (RWorkspace: 0)
1516*
1517 CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1518 $ work( iwork ), lwork-iwork+1, ierr )
1519 irwork = ie + n
1520*
1521* Perform bidiagonal QR iteration, computing left
1522* singular vectors of A in U and computing right
1523* singular vectors of A in VT
1524* (CWorkspace: 0)
1525* (RWorkspace: need BDSPAC)
1526*
1527 CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
1528 $ ldvt, u, ldu, cdum, 1,
1529 $ rwork( irwork ), info )
1530*
1531 END IF
1532*
1533 END IF
1534*
1535 ELSE IF( wntua ) THEN
1536*
1537 IF( wntvn ) THEN
1538*
1539* Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1540* M left singular vectors to be computed in U and
1541* no right singular vectors to be computed
1542*
1543 IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN
1544*
1545* Sufficient workspace for a fast algorithm
1546*
1547 ir = 1
1548 IF( lwork.GE.wrkbl+lda*n ) THEN
1549*
1550* WORK(IR) is LDA by N
1551*
1552 ldwrkr = lda
1553 ELSE
1554*
1555* WORK(IR) is N by N
1556*
1557 ldwrkr = n
1558 END IF
1559 itau = ir + ldwrkr*n
1560 iwork = itau + n
1561*
1562* Compute A=Q*R, copying result to U
1563* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1564* (RWorkspace: 0)
1565*
1566 CALL zgeqrf( m, n, a, lda, work( itau ),
1567 $ work( iwork ), lwork-iwork+1, ierr )
1568 CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1569*
1570* Copy R to WORK(IR), zeroing out below it
1571*
1572 CALL zlacpy( 'U', n, n, a, lda, work( ir ),
1573 $ ldwrkr )
1574 CALL zlaset( 'L', n-1, n-1, czero, czero,
1575 $ work( ir+1 ), ldwrkr )
1576*
1577* Generate Q in U
1578* (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1579* (RWorkspace: 0)
1580*
1581 CALL zungqr( m, m, n, u, ldu, work( itau ),
1582 $ work( iwork ), lwork-iwork+1, ierr )
1583 ie = 1
1584 itauq = itau
1585 itaup = itauq + n
1586 iwork = itaup + n
1587*
1588* Bidiagonalize R in WORK(IR)
1589* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1590* (RWorkspace: need N)
1591*
1592 CALL zgebrd( n, n, work( ir ), ldwrkr, s,
1593 $ rwork( ie ), work( itauq ),
1594 $ work( itaup ), work( iwork ),
1595 $ lwork-iwork+1, ierr )
1596*
1597* Generate left bidiagonalizing vectors in WORK(IR)
1598* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1599* (RWorkspace: 0)
1600*
1601 CALL zungbr( 'Q', n, n, n, work( ir ), ldwrkr,
1602 $ work( itauq ), work( iwork ),
1603 $ lwork-iwork+1, ierr )
1604 irwork = ie + n
1605*
1606* Perform bidiagonal QR iteration, computing left
1607* singular vectors of R in WORK(IR)
1608* (CWorkspace: need N*N)
1609* (RWorkspace: need BDSPAC)
1610*
1611 CALL zbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,
1612 $ 1, work( ir ), ldwrkr, cdum, 1,
1613 $ rwork( irwork ), info )
1614*
1615* Multiply Q in U by left singular vectors of R in
1616* WORK(IR), storing result in A
1617* (CWorkspace: need N*N)
1618* (RWorkspace: 0)
1619*
1620 CALL zgemm( 'N', 'N', m, n, n, cone, u, ldu,
1621 $ work( ir ), ldwrkr, czero, a, lda )
1622*
1623* Copy left singular vectors of A from A to U
1624*
1625 CALL zlacpy( 'F', m, n, a, lda, u, ldu )
1626*
1627 ELSE
1628*
1629* Insufficient workspace for a fast algorithm
1630*
1631 itau = 1
1632 iwork = itau + n
1633*
1634* Compute A=Q*R, copying result to U
1635* (CWorkspace: need 2*N, prefer N+N*NB)
1636* (RWorkspace: 0)
1637*
1638 CALL zgeqrf( m, n, a, lda, work( itau ),
1639 $ work( iwork ), lwork-iwork+1, ierr )
1640 CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1641*
1642* Generate Q in U
1643* (CWorkspace: need N+M, prefer N+M*NB)
1644* (RWorkspace: 0)
1645*
1646 CALL zungqr( m, m, n, u, ldu, work( itau ),
1647 $ work( iwork ), lwork-iwork+1, ierr )
1648 ie = 1
1649 itauq = itau
1650 itaup = itauq + n
1651 iwork = itaup + n
1652*
1653* Zero out below R in A
1654*
1655 IF( n .GT. 1 ) THEN
1656 CALL zlaset( 'L', n-1, n-1, czero, czero,
1657 $ a( 2, 1 ), lda )
1658 END IF
1659*
1660* Bidiagonalize R in A
1661* (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1662* (RWorkspace: need N)
1663*
1664 CALL zgebrd( n, n, a, lda, s, rwork( ie ),
1665 $ work( itauq ), work( itaup ),
1666 $ work( iwork ), lwork-iwork+1, ierr )
1667*
1668* Multiply Q in U by left bidiagonalizing vectors
1669* in A
1670* (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1671* (RWorkspace: 0)
1672*
1673 CALL zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1674 $ work( itauq ), u, ldu, work( iwork ),
1675 $ lwork-iwork+1, ierr )
1676 irwork = ie + n
1677*
1678* Perform bidiagonal QR iteration, computing left
1679* singular vectors of A in U
1680* (CWorkspace: 0)
1681* (RWorkspace: need BDSPAC)
1682*
1683 CALL zbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,
1684 $ 1, u, ldu, cdum, 1, rwork( irwork ),
1685 $ info )
1686*
1687 END IF
1688*
1689 ELSE IF( wntvo ) THEN
1690*
1691* Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1692* M left singular vectors to be computed in U and
1693* N right singular vectors to be overwritten on A
1694*
1695 IF( lwork.GE.2*n*n+max( n+m, 3*n ) ) THEN
1696*
1697* Sufficient workspace for a fast algorithm
1698*
1699 iu = 1
1700 IF( lwork.GE.wrkbl+2*lda*n ) THEN
1701*
1702* WORK(IU) is LDA by N and WORK(IR) is LDA by N
1703*
1704 ldwrku = lda
1705 ir = iu + ldwrku*n
1706 ldwrkr = lda
1707 ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1708*
1709* WORK(IU) is LDA by N and WORK(IR) is N by N
1710*
1711 ldwrku = lda
1712 ir = iu + ldwrku*n
1713 ldwrkr = n
1714 ELSE
1715*
1716* WORK(IU) is N by N and WORK(IR) is N by N
1717*
1718 ldwrku = n
1719 ir = iu + ldwrku*n
1720 ldwrkr = n
1721 END IF
1722 itau = ir + ldwrkr*n
1723 iwork = itau + n
1724*
1725* Compute A=Q*R, copying result to U
1726* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1727* (RWorkspace: 0)
1728*
1729 CALL zgeqrf( m, n, a, lda, work( itau ),
1730 $ work( iwork ), lwork-iwork+1, ierr )
1731 CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1732*
1733* Generate Q in U
1734* (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1735* (RWorkspace: 0)
1736*
1737 CALL zungqr( m, m, n, u, ldu, work( itau ),
1738 $ work( iwork ), lwork-iwork+1, ierr )
1739*
1740* Copy R to WORK(IU), zeroing out below it
1741*
1742 CALL zlacpy( 'U', n, n, a, lda, work( iu ),
1743 $ ldwrku )
1744 CALL zlaset( 'L', n-1, n-1, czero, czero,
1745 $ work( iu+1 ), ldwrku )
1746 ie = 1
1747 itauq = itau
1748 itaup = itauq + n
1749 iwork = itaup + n
1750*
1751* Bidiagonalize R in WORK(IU), copying result to
1752* WORK(IR)
1753* (CWorkspace: need 2*N*N+3*N,
1754* prefer 2*N*N+2*N+2*N*NB)
1755* (RWorkspace: need N)
1756*
1757 CALL zgebrd( n, n, work( iu ), ldwrku, s,
1758 $ rwork( ie ), work( itauq ),
1759 $ work( itaup ), work( iwork ),
1760 $ lwork-iwork+1, ierr )
1761 CALL zlacpy( 'U', n, n, work( iu ), ldwrku,
1762 $ work( ir ), ldwrkr )
1763*
1764* Generate left bidiagonalizing vectors in WORK(IU)
1765* (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1766* (RWorkspace: 0)
1767*
1768 CALL zungbr( 'Q', n, n, n, work( iu ), ldwrku,
1769 $ work( itauq ), work( iwork ),
1770 $ lwork-iwork+1, ierr )
1771*
1772* Generate right bidiagonalizing vectors in WORK(IR)
1773* (CWorkspace: need 2*N*N+3*N-1,
1774* prefer 2*N*N+2*N+(N-1)*NB)
1775* (RWorkspace: 0)
1776*
1777 CALL zungbr( 'P', n, n, n, work( ir ), ldwrkr,
1778 $ work( itaup ), work( iwork ),
1779 $ lwork-iwork+1, ierr )
1780 irwork = ie + n
1781*
1782* Perform bidiagonal QR iteration, computing left
1783* singular vectors of R in WORK(IU) and computing
1784* right singular vectors of R in WORK(IR)
1785* (CWorkspace: need 2*N*N)
1786* (RWorkspace: need BDSPAC)
1787*
1788 CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
1789 $ work( ir ), ldwrkr, work( iu ),
1790 $ ldwrku, cdum, 1, rwork( irwork ),
1791 $ info )
1792*
1793* Multiply Q in U by left singular vectors of R in
1794* WORK(IU), storing result in A
1795* (CWorkspace: need N*N)
1796* (RWorkspace: 0)
1797*
1798 CALL zgemm( 'N', 'N', m, n, n, cone, u, ldu,
1799 $ work( iu ), ldwrku, czero, a, lda )
1800*
1801* Copy left singular vectors of A from A to U
1802*
1803 CALL zlacpy( 'F', m, n, a, lda, u, ldu )
1804*
1805* Copy right singular vectors of R from WORK(IR) to A
1806*
1807 CALL zlacpy( 'F', n, n, work( ir ), ldwrkr, a,
1808 $ lda )
1809*
1810 ELSE
1811*
1812* Insufficient workspace for a fast algorithm
1813*
1814 itau = 1
1815 iwork = itau + n
1816*
1817* Compute A=Q*R, copying result to U
1818* (CWorkspace: need 2*N, prefer N+N*NB)
1819* (RWorkspace: 0)
1820*
1821 CALL zgeqrf( m, n, a, lda, work( itau ),
1822 $ work( iwork ), lwork-iwork+1, ierr )
1823 CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1824*
1825* Generate Q in U
1826* (CWorkspace: need N+M, prefer N+M*NB)
1827* (RWorkspace: 0)
1828*
1829 CALL zungqr( m, m, n, u, ldu, work( itau ),
1830 $ work( iwork ), lwork-iwork+1, ierr )
1831 ie = 1
1832 itauq = itau
1833 itaup = itauq + n
1834 iwork = itaup + n
1835*
1836* Zero out below R in A
1837*
1838 IF( n .GT. 1 ) THEN
1839 CALL zlaset( 'L', n-1, n-1, czero, czero,
1840 $ a( 2, 1 ), lda )
1841 END IF
1842*
1843* Bidiagonalize R in A
1844* (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1845* (RWorkspace: need N)
1846*
1847 CALL zgebrd( n, n, a, lda, s, rwork( ie ),
1848 $ work( itauq ), work( itaup ),
1849 $ work( iwork ), lwork-iwork+1, ierr )
1850*
1851* Multiply Q in U by left bidiagonalizing vectors
1852* in A
1853* (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1854* (RWorkspace: 0)
1855*
1856 CALL zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1857 $ work( itauq ), u, ldu, work( iwork ),
1858 $ lwork-iwork+1, ierr )
1859*
1860* Generate right bidiagonalizing vectors in A
1861* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1862* (RWorkspace: 0)
1863*
1864 CALL zungbr( 'P', n, n, n, a, lda, work( itaup ),
1865 $ work( iwork ), lwork-iwork+1, ierr )
1866 irwork = ie + n
1867*
1868* Perform bidiagonal QR iteration, computing left
1869* singular vectors of A in U and computing right
1870* singular vectors of A in A
1871* (CWorkspace: 0)
1872* (RWorkspace: need BDSPAC)
1873*
1874 CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,
1875 $ lda, u, ldu, cdum, 1, rwork( irwork ),
1876 $ info )
1877*
1878 END IF
1879*
1880 ELSE IF( wntvas ) THEN
1881*
1882* Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1883* or 'A')
1884* M left singular vectors to be computed in U and
1885* N right singular vectors to be computed in VT
1886*
1887 IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN
1888*
1889* Sufficient workspace for a fast algorithm
1890*
1891 iu = 1
1892 IF( lwork.GE.wrkbl+lda*n ) THEN
1893*
1894* WORK(IU) is LDA by N
1895*
1896 ldwrku = lda
1897 ELSE
1898*
1899* WORK(IU) is N by N
1900*
1901 ldwrku = n
1902 END IF
1903 itau = iu + ldwrku*n
1904 iwork = itau + n
1905*
1906* Compute A=Q*R, copying result to U
1907* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1908* (RWorkspace: 0)
1909*
1910 CALL zgeqrf( m, n, a, lda, work( itau ),
1911 $ work( iwork ), lwork-iwork+1, ierr )
1912 CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1913*
1914* Generate Q in U
1915* (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1916* (RWorkspace: 0)
1917*
1918 CALL zungqr( m, m, n, u, ldu, work( itau ),
1919 $ work( iwork ), lwork-iwork+1, ierr )
1920*
1921* Copy R to WORK(IU), zeroing out below it
1922*
1923 CALL zlacpy( 'U', n, n, a, lda, work( iu ),
1924 $ ldwrku )
1925 CALL zlaset( 'L', n-1, n-1, czero, czero,
1926 $ work( iu+1 ), ldwrku )
1927 ie = 1
1928 itauq = itau
1929 itaup = itauq + n
1930 iwork = itaup + n
1931*
1932* Bidiagonalize R in WORK(IU), copying result to VT
1933* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1934* (RWorkspace: need N)
1935*
1936 CALL zgebrd( n, n, work( iu ), ldwrku, s,
1937 $ rwork( ie ), work( itauq ),
1938 $ work( itaup ), work( iwork ),
1939 $ lwork-iwork+1, ierr )
1940 CALL zlacpy( 'U', n, n, work( iu ), ldwrku, vt,
1941 $ ldvt )
1942*
1943* Generate left bidiagonalizing vectors in WORK(IU)
1944* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1945* (RWorkspace: 0)
1946*
1947 CALL zungbr( 'Q', n, n, n, work( iu ), ldwrku,
1948 $ work( itauq ), work( iwork ),
1949 $ lwork-iwork+1, ierr )
1950*
1951* Generate right bidiagonalizing vectors in VT
1952* (CWorkspace: need N*N+3*N-1,
1953* prefer N*N+2*N+(N-1)*NB)
1954* (RWorkspace: need 0)
1955*
1956 CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1957 $ work( iwork ), lwork-iwork+1, ierr )
1958 irwork = ie + n
1959*
1960* Perform bidiagonal QR iteration, computing left
1961* singular vectors of R in WORK(IU) and computing
1962* right singular vectors of R in VT
1963* (CWorkspace: need N*N)
1964* (RWorkspace: need BDSPAC)
1965*
1966 CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
1967 $ ldvt, work( iu ), ldwrku, cdum, 1,
1968 $ rwork( irwork ), info )
1969*
1970* Multiply Q in U by left singular vectors of R in
1971* WORK(IU), storing result in A
1972* (CWorkspace: need N*N)
1973* (RWorkspace: 0)
1974*
1975 CALL zgemm( 'N', 'N', m, n, n, cone, u, ldu,
1976 $ work( iu ), ldwrku, czero, a, lda )
1977*
1978* Copy left singular vectors of A from A to U
1979*
1980 CALL zlacpy( 'F', m, n, a, lda, u, ldu )
1981*
1982 ELSE
1983*
1984* Insufficient workspace for a fast algorithm
1985*
1986 itau = 1
1987 iwork = itau + n
1988*
1989* Compute A=Q*R, copying result to U
1990* (CWorkspace: need 2*N, prefer N+N*NB)
1991* (RWorkspace: 0)
1992*
1993 CALL zgeqrf( m, n, a, lda, work( itau ),
1994 $ work( iwork ), lwork-iwork+1, ierr )
1995 CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1996*
1997* Generate Q in U
1998* (CWorkspace: need N+M, prefer N+M*NB)
1999* (RWorkspace: 0)
2000*
2001 CALL zungqr( m, m, n, u, ldu, work( itau ),
2002 $ work( iwork ), lwork-iwork+1, ierr )
2003*
2004* Copy R from A to VT, zeroing out below it
2005*
2006 CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
2007 IF( n.GT.1 )
2008 $ CALL zlaset( 'L', n-1, n-1, czero, czero,
2009 $ vt( 2, 1 ), ldvt )
2010 ie = 1
2011 itauq = itau
2012 itaup = itauq + n
2013 iwork = itaup + n
2014*
2015* Bidiagonalize R in VT
2016* (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
2017* (RWorkspace: need N)
2018*
2019 CALL zgebrd( n, n, vt, ldvt, s, rwork( ie ),
2020 $ work( itauq ), work( itaup ),
2021 $ work( iwork ), lwork-iwork+1, ierr )
2022*
2023* Multiply Q in U by left bidiagonalizing vectors
2024* in VT
2025* (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
2026* (RWorkspace: 0)
2027*
2028 CALL zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
2029 $ work( itauq ), u, ldu, work( iwork ),
2030 $ lwork-iwork+1, ierr )
2031*
2032* Generate right bidiagonalizing vectors in VT
2033* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2034* (RWorkspace: 0)
2035*
2036 CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2037 $ work( iwork ), lwork-iwork+1, ierr )
2038 irwork = ie + n
2039*
2040* Perform bidiagonal QR iteration, computing left
2041* singular vectors of A in U and computing right
2042* singular vectors of A in VT
2043* (CWorkspace: 0)
2044* (RWorkspace: need BDSPAC)
2045*
2046 CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
2047 $ ldvt, u, ldu, cdum, 1,
2048 $ rwork( irwork ), info )
2049*
2050 END IF
2051*
2052 END IF
2053*
2054 END IF
2055*
2056 ELSE
2057*
2058* M .LT. MNTHR
2059*
2060* Path 10 (M at least N, but not much larger)
2061* Reduce to bidiagonal form without QR decomposition
2062*
2063 ie = 1
2064 itauq = 1
2065 itaup = itauq + n
2066 iwork = itaup + n
2067*
2068* Bidiagonalize A
2069* (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
2070* (RWorkspace: need N)
2071*
2072 CALL zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
2073 $ work( itaup ), work( iwork ), lwork-iwork+1,
2074 $ ierr )
2075 IF( wntuas ) THEN
2076*
2077* If left singular vectors desired in U, copy result to U
2078* and generate left bidiagonalizing vectors in U
2079* (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB)
2080* (RWorkspace: 0)
2081*
2082 CALL zlacpy( 'L', m, n, a, lda, u, ldu )
2083 IF( wntus )
2084 $ ncu = n
2085 IF( wntua )
2086 $ ncu = m
2087 CALL zungbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
2088 $ work( iwork ), lwork-iwork+1, ierr )
2089 END IF
2090 IF( wntvas ) THEN
2091*
2092* If right singular vectors desired in VT, copy result to
2093* VT and generate right bidiagonalizing vectors in VT
2094* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2095* (RWorkspace: 0)
2096*
2097 CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
2098 CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2099 $ work( iwork ), lwork-iwork+1, ierr )
2100 END IF
2101 IF( wntuo ) THEN
2102*
2103* If left singular vectors desired in A, generate left
2104* bidiagonalizing vectors in A
2105* (CWorkspace: need 3*N, prefer 2*N+N*NB)
2106* (RWorkspace: 0)
2107*
2108 CALL zungbr( 'Q', m, n, n, a, lda, work( itauq ),
2109 $ work( iwork ), lwork-iwork+1, ierr )
2110 END IF
2111 IF( wntvo ) THEN
2112*
2113* If right singular vectors desired in A, generate right
2114* bidiagonalizing vectors in A
2115* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2116* (RWorkspace: 0)
2117*
2118 CALL zungbr( 'P', n, n, n, a, lda, work( itaup ),
2119 $ work( iwork ), lwork-iwork+1, ierr )
2120 END IF
2121 irwork = ie + n
2122 IF( wntuas .OR. wntuo )
2123 $ nru = m
2124 IF( wntun )
2125 $ nru = 0
2126 IF( wntvas .OR. wntvo )
2127 $ ncvt = n
2128 IF( wntvn )
2129 $ ncvt = 0
2130 IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2131*
2132* Perform bidiagonal QR iteration, if desired, computing
2133* left singular vectors in U and computing right singular
2134* vectors in VT
2135* (CWorkspace: 0)
2136* (RWorkspace: need BDSPAC)
2137*
2138 CALL zbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,
2139 $ ldvt, u, ldu, cdum, 1, rwork( irwork ),
2140 $ info )
2141 ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2142*
2143* Perform bidiagonal QR iteration, if desired, computing
2144* left singular vectors in U and computing right singular
2145* vectors in A
2146* (CWorkspace: 0)
2147* (RWorkspace: need BDSPAC)
2148*
2149 CALL zbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), a,
2150 $ lda, u, ldu, cdum, 1, rwork( irwork ),
2151 $ info )
2152 ELSE
2153*
2154* Perform bidiagonal QR iteration, if desired, computing
2155* left singular vectors in A and computing right singular
2156* vectors in VT
2157* (CWorkspace: 0)
2158* (RWorkspace: need BDSPAC)
2159*
2160 CALL zbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,
2161 $ ldvt, a, lda, cdum, 1, rwork( irwork ),
2162 $ info )
2163 END IF
2164*
2165 END IF
2166*
2167 ELSE
2168*
2169* A has more columns than rows. If A has sufficiently more
2170* columns than rows, first reduce using the LQ decomposition (if
2171* sufficient workspace available)
2172*
2173 IF( n.GE.mnthr ) THEN
2174*
2175 IF( wntvn ) THEN
2176*
2177* Path 1t(N much larger than M, JOBVT='N')
2178* No right singular vectors to be computed
2179*
2180 itau = 1
2181 iwork = itau + m
2182*
2183* Compute A=L*Q
2184* (CWorkspace: need 2*M, prefer M+M*NB)
2185* (RWorkspace: 0)
2186*
2187 CALL zgelqf( m, n, a, lda, work( itau ), work( iwork ),
2188 $ lwork-iwork+1, ierr )
2189*
2190* Zero out above L
2191*
2192 CALL zlaset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),
2193 $ lda )
2194 ie = 1
2195 itauq = 1
2196 itaup = itauq + m
2197 iwork = itaup + m
2198*
2199* Bidiagonalize L in A
2200* (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2201* (RWorkspace: need M)
2202*
2203 CALL zgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),
2204 $ work( itaup ), work( iwork ), lwork-iwork+1,
2205 $ ierr )
2206 IF( wntuo .OR. wntuas ) THEN
2207*
2208* If left singular vectors desired, generate Q
2209* (CWorkspace: need 3*M, prefer 2*M+M*NB)
2210* (RWorkspace: 0)
2211*
2212 CALL zungbr( 'Q', m, m, m, a, lda, work( itauq ),
2213 $ work( iwork ), lwork-iwork+1, ierr )
2214 END IF
2215 irwork = ie + m
2216 nru = 0
2217 IF( wntuo .OR. wntuas )
2218 $ nru = m
2219*
2220* Perform bidiagonal QR iteration, computing left singular
2221* vectors of A in A if desired
2222* (CWorkspace: 0)
2223* (RWorkspace: need BDSPAC)
2224*
2225 CALL zbdsqr( 'U', m, 0, nru, 0, s, rwork( ie ), cdum, 1,
2226 $ a, lda, cdum, 1, rwork( irwork ), info )
2227*
2228* If left singular vectors desired in U, copy them there
2229*
2230 IF( wntuas )
2231 $ CALL zlacpy( 'F', m, m, a, lda, u, ldu )
2232*
2233 ELSE IF( wntvo .AND. wntun ) THEN
2234*
2235* Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2236* M right singular vectors to be overwritten on A and
2237* no left singular vectors to be computed
2238*
2239 IF( lwork.GE.m*m+3*m ) THEN
2240*
2241* Sufficient workspace for a fast algorithm
2242*
2243 ir = 1
2244 IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN
2245*
2246* WORK(IU) is LDA by N and WORK(IR) is LDA by M
2247*
2248 ldwrku = lda
2249 chunk = n
2250 ldwrkr = lda
2251 ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN
2252*
2253* WORK(IU) is LDA by N and WORK(IR) is M by M
2254*
2255 ldwrku = lda
2256 chunk = n
2257 ldwrkr = m
2258 ELSE
2259*
2260* WORK(IU) is M by CHUNK and WORK(IR) is M by M
2261*
2262 ldwrku = m
2263 chunk = ( lwork-m*m ) / m
2264 ldwrkr = m
2265 END IF
2266 itau = ir + ldwrkr*m
2267 iwork = itau + m
2268*
2269* Compute A=L*Q
2270* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2271* (RWorkspace: 0)
2272*
2273 CALL zgelqf( m, n, a, lda, work( itau ),
2274 $ work( iwork ), lwork-iwork+1, ierr )
2275*
2276* Copy L to WORK(IR) and zero out above it
2277*
2278 CALL zlacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
2279 CALL zlaset( 'U', m-1, m-1, czero, czero,
2280 $ work( ir+ldwrkr ), ldwrkr )
2281*
2282* Generate Q in A
2283* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2284* (RWorkspace: 0)
2285*
2286 CALL zunglq( m, n, m, a, lda, work( itau ),
2287 $ work( iwork ), lwork-iwork+1, ierr )
2288 ie = 1
2289 itauq = itau
2290 itaup = itauq + m
2291 iwork = itaup + m
2292*
2293* Bidiagonalize L in WORK(IR)
2294* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2295* (RWorkspace: need M)
2296*
2297 CALL zgebrd( m, m, work( ir ), ldwrkr, s, rwork( ie ),
2298 $ work( itauq ), work( itaup ),
2299 $ work( iwork ), lwork-iwork+1, ierr )
2300*
2301* Generate right vectors bidiagonalizing L
2302* (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2303* (RWorkspace: 0)
2304*
2305 CALL zungbr( 'P', m, m, m, work( ir ), ldwrkr,
2306 $ work( itaup ), work( iwork ),
2307 $ lwork-iwork+1, ierr )
2308 irwork = ie + m
2309*
2310* Perform bidiagonal QR iteration, computing right
2311* singular vectors of L in WORK(IR)
2312* (CWorkspace: need M*M)
2313* (RWorkspace: need BDSPAC)
2314*
2315 CALL zbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
2316 $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
2317 $ rwork( irwork ), info )
2318 iu = itauq
2319*
2320* Multiply right singular vectors of L in WORK(IR) by Q
2321* in A, storing result in WORK(IU) and copying to A
2322* (CWorkspace: need M*M+M, prefer M*M+M*N)
2323* (RWorkspace: 0)
2324*
2325 DO 30 i = 1, n, chunk
2326 blk = min( n-i+1, chunk )
2327 CALL zgemm( 'N', 'N', m, blk, m, cone, work( ir ),
2328 $ ldwrkr, a( 1, i ), lda, czero,
2329 $ work( iu ), ldwrku )
2330 CALL zlacpy( 'F', m, blk, work( iu ), ldwrku,
2331 $ a( 1, i ), lda )
2332 30 CONTINUE
2333*
2334 ELSE
2335*
2336* Insufficient workspace for a fast algorithm
2337*
2338 ie = 1
2339 itauq = 1
2340 itaup = itauq + m
2341 iwork = itaup + m
2342*
2343* Bidiagonalize A
2344* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
2345* (RWorkspace: need M)
2346*
2347 CALL zgebrd( m, n, a, lda, s, rwork( ie ),
2348 $ work( itauq ), work( itaup ),
2349 $ work( iwork ), lwork-iwork+1, ierr )
2350*
2351* Generate right vectors bidiagonalizing A
2352* (CWorkspace: need 3*M, prefer 2*M+M*NB)
2353* (RWorkspace: 0)
2354*
2355 CALL zungbr( 'P', m, n, m, a, lda, work( itaup ),
2356 $ work( iwork ), lwork-iwork+1, ierr )
2357 irwork = ie + m
2358*
2359* Perform bidiagonal QR iteration, computing right
2360* singular vectors of A in A
2361* (CWorkspace: 0)
2362* (RWorkspace: need BDSPAC)
2363*
2364 CALL zbdsqr( 'L', m, n, 0, 0, s, rwork( ie ), a, lda,
2365 $ cdum, 1, cdum, 1, rwork( irwork ), info )
2366*
2367 END IF
2368*
2369 ELSE IF( wntvo .AND. wntuas ) THEN
2370*
2371* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2372* M right singular vectors to be overwritten on A and
2373* M left singular vectors to be computed in U
2374*
2375 IF( lwork.GE.m*m+3*m ) THEN
2376*
2377* Sufficient workspace for a fast algorithm
2378*
2379 ir = 1
2380 IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN
2381*
2382* WORK(IU) is LDA by N and WORK(IR) is LDA by M
2383*
2384 ldwrku = lda
2385 chunk = n
2386 ldwrkr = lda
2387 ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN
2388*
2389* WORK(IU) is LDA by N and WORK(IR) is M by M
2390*
2391 ldwrku = lda
2392 chunk = n
2393 ldwrkr = m
2394 ELSE
2395*
2396* WORK(IU) is M by CHUNK and WORK(IR) is M by M
2397*
2398 ldwrku = m
2399 chunk = ( lwork-m*m ) / m
2400 ldwrkr = m
2401 END IF
2402 itau = ir + ldwrkr*m
2403 iwork = itau + m
2404*
2405* Compute A=L*Q
2406* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2407* (RWorkspace: 0)
2408*
2409 CALL zgelqf( m, n, a, lda, work( itau ),
2410 $ work( iwork ), lwork-iwork+1, ierr )
2411*
2412* Copy L to U, zeroing about above it
2413*
2414 CALL zlacpy( 'L', m, m, a, lda, u, ldu )
2415 CALL zlaset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),
2416 $ ldu )
2417*
2418* Generate Q in A
2419* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2420* (RWorkspace: 0)
2421*
2422 CALL zunglq( m, n, m, a, lda, work( itau ),
2423 $ work( iwork ), lwork-iwork+1, ierr )
2424 ie = 1
2425 itauq = itau
2426 itaup = itauq + m
2427 iwork = itaup + m
2428*
2429* Bidiagonalize L in U, copying result to WORK(IR)
2430* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2431* (RWorkspace: need M)
2432*
2433 CALL zgebrd( m, m, u, ldu, s, rwork( ie ),
2434 $ work( itauq ), work( itaup ),
2435 $ work( iwork ), lwork-iwork+1, ierr )
2436 CALL zlacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
2437*
2438* Generate right vectors bidiagonalizing L in WORK(IR)
2439* (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2440* (RWorkspace: 0)
2441*
2442 CALL zungbr( 'P', m, m, m, work( ir ), ldwrkr,
2443 $ work( itaup ), work( iwork ),
2444 $ lwork-iwork+1, ierr )
2445*
2446* Generate left vectors bidiagonalizing L in U
2447* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2448* (RWorkspace: 0)
2449*
2450 CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2451 $ work( iwork ), lwork-iwork+1, ierr )
2452 irwork = ie + m
2453*
2454* Perform bidiagonal QR iteration, computing left
2455* singular vectors of L in U, and computing right
2456* singular vectors of L in WORK(IR)
2457* (CWorkspace: need M*M)
2458* (RWorkspace: need BDSPAC)
2459*
2460 CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2461 $ work( ir ), ldwrkr, u, ldu, cdum, 1,
2462 $ rwork( irwork ), info )
2463 iu = itauq
2464*
2465* Multiply right singular vectors of L in WORK(IR) by Q
2466* in A, storing result in WORK(IU) and copying to A
2467* (CWorkspace: need M*M+M, prefer M*M+M*N))
2468* (RWorkspace: 0)
2469*
2470 DO 40 i = 1, n, chunk
2471 blk = min( n-i+1, chunk )
2472 CALL zgemm( 'N', 'N', m, blk, m, cone, work( ir ),
2473 $ ldwrkr, a( 1, i ), lda, czero,
2474 $ work( iu ), ldwrku )
2475 CALL zlacpy( 'F', m, blk, work( iu ), ldwrku,
2476 $ a( 1, i ), lda )
2477 40 CONTINUE
2478*
2479 ELSE
2480*
2481* Insufficient workspace for a fast algorithm
2482*
2483 itau = 1
2484 iwork = itau + m
2485*
2486* Compute A=L*Q
2487* (CWorkspace: need 2*M, prefer M+M*NB)
2488* (RWorkspace: 0)
2489*
2490 CALL zgelqf( m, n, a, lda, work( itau ),
2491 $ work( iwork ), lwork-iwork+1, ierr )
2492*
2493* Copy L to U, zeroing out above it
2494*
2495 CALL zlacpy( 'L', m, m, a, lda, u, ldu )
2496 CALL zlaset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),
2497 $ ldu )
2498*
2499* Generate Q in A
2500* (CWorkspace: need 2*M, prefer M+M*NB)
2501* (RWorkspace: 0)
2502*
2503 CALL zunglq( m, n, m, a, lda, work( itau ),
2504 $ work( iwork ), lwork-iwork+1, ierr )
2505 ie = 1
2506 itauq = itau
2507 itaup = itauq + m
2508 iwork = itaup + m
2509*
2510* Bidiagonalize L in U
2511* (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2512* (RWorkspace: need M)
2513*
2514 CALL zgebrd( m, m, u, ldu, s, rwork( ie ),
2515 $ work( itauq ), work( itaup ),
2516 $ work( iwork ), lwork-iwork+1, ierr )
2517*
2518* Multiply right vectors bidiagonalizing L by Q in A
2519* (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2520* (RWorkspace: 0)
2521*
2522 CALL zunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
2523 $ work( itaup ), a, lda, work( iwork ),
2524 $ lwork-iwork+1, ierr )
2525*
2526* Generate left vectors bidiagonalizing L in U
2527* (CWorkspace: need 3*M, prefer 2*M+M*NB)
2528* (RWorkspace: 0)
2529*
2530 CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2531 $ work( iwork ), lwork-iwork+1, ierr )
2532 irwork = ie + m
2533*
2534* Perform bidiagonal QR iteration, computing left
2535* singular vectors of A in U and computing right
2536* singular vectors of A in A
2537* (CWorkspace: 0)
2538* (RWorkspace: need BDSPAC)
2539*
2540 CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), a, lda,
2541 $ u, ldu, cdum, 1, rwork( irwork ), info )
2542*
2543 END IF
2544*
2545 ELSE IF( wntvs ) THEN
2546*
2547 IF( wntun ) THEN
2548*
2549* Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2550* M right singular vectors to be computed in VT and
2551* no left singular vectors to be computed
2552*
2553 IF( lwork.GE.m*m+3*m ) THEN
2554*
2555* Sufficient workspace for a fast algorithm
2556*
2557 ir = 1
2558 IF( lwork.GE.wrkbl+lda*m ) THEN
2559*
2560* WORK(IR) is LDA by M
2561*
2562 ldwrkr = lda
2563 ELSE
2564*
2565* WORK(IR) is M by M
2566*
2567 ldwrkr = m
2568 END IF
2569 itau = ir + ldwrkr*m
2570 iwork = itau + m
2571*
2572* Compute A=L*Q
2573* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2574* (RWorkspace: 0)
2575*
2576 CALL zgelqf( m, n, a, lda, work( itau ),
2577 $ work( iwork ), lwork-iwork+1, ierr )
2578*
2579* Copy L to WORK(IR), zeroing out above it
2580*
2581 CALL zlacpy( 'L', m, m, a, lda, work( ir ),
2582 $ ldwrkr )
2583 CALL zlaset( 'U', m-1, m-1, czero, czero,
2584 $ work( ir+ldwrkr ), ldwrkr )
2585*
2586* Generate Q in A
2587* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2588* (RWorkspace: 0)
2589*
2590 CALL zunglq( m, n, m, a, lda, work( itau ),
2591 $ work( iwork ), lwork-iwork+1, ierr )
2592 ie = 1
2593 itauq = itau
2594 itaup = itauq + m
2595 iwork = itaup + m
2596*
2597* Bidiagonalize L in WORK(IR)
2598* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2599* (RWorkspace: need M)
2600*
2601 CALL zgebrd( m, m, work( ir ), ldwrkr, s,
2602 $ rwork( ie ), work( itauq ),
2603 $ work( itaup ), work( iwork ),
2604 $ lwork-iwork+1, ierr )
2605*
2606* Generate right vectors bidiagonalizing L in
2607* WORK(IR)
2608* (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
2609* (RWorkspace: 0)
2610*
2611 CALL zungbr( 'P', m, m, m, work( ir ), ldwrkr,
2612 $ work( itaup ), work( iwork ),
2613 $ lwork-iwork+1, ierr )
2614 irwork = ie + m
2615*
2616* Perform bidiagonal QR iteration, computing right
2617* singular vectors of L in WORK(IR)
2618* (CWorkspace: need M*M)
2619* (RWorkspace: need BDSPAC)
2620*
2621 CALL zbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
2622 $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
2623 $ rwork( irwork ), info )
2624*
2625* Multiply right singular vectors of L in WORK(IR) by
2626* Q in A, storing result in VT
2627* (CWorkspace: need M*M)
2628* (RWorkspace: 0)
2629*
2630 CALL zgemm( 'N', 'N', m, n, m, cone, work( ir ),
2631 $ ldwrkr, a, lda, czero, vt, ldvt )
2632*
2633 ELSE
2634*
2635* Insufficient workspace for a fast algorithm
2636*
2637 itau = 1
2638 iwork = itau + m
2639*
2640* Compute A=L*Q
2641* (CWorkspace: need 2*M, prefer M+M*NB)
2642* (RWorkspace: 0)
2643*
2644 CALL zgelqf( m, n, a, lda, work( itau ),
2645 $ work( iwork ), lwork-iwork+1, ierr )
2646*
2647* Copy result to VT
2648*
2649 CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
2650*
2651* Generate Q in VT
2652* (CWorkspace: need 2*M, prefer M+M*NB)
2653* (RWorkspace: 0)
2654*
2655 CALL zunglq( m, n, m, vt, ldvt, work( itau ),
2656 $ work( iwork ), lwork-iwork+1, ierr )
2657 ie = 1
2658 itauq = itau
2659 itaup = itauq + m
2660 iwork = itaup + m
2661*
2662* Zero out above L in A
2663*
2664 CALL zlaset( 'U', m-1, m-1, czero, czero,
2665 $ a( 1, 2 ), lda )
2666*
2667* Bidiagonalize L in A
2668* (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2669* (RWorkspace: need M)
2670*
2671 CALL zgebrd( m, m, a, lda, s, rwork( ie ),
2672 $ work( itauq ), work( itaup ),
2673 $ work( iwork ), lwork-iwork+1, ierr )
2674*
2675* Multiply right vectors bidiagonalizing L by Q in VT
2676* (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2677* (RWorkspace: 0)
2678*
2679 CALL zunmbr( 'P', 'L', 'C', m, n, m, a, lda,
2680 $ work( itaup ), vt, ldvt,
2681 $ work( iwork ), lwork-iwork+1, ierr )
2682 irwork = ie + m
2683*
2684* Perform bidiagonal QR iteration, computing right
2685* singular vectors of A in VT
2686* (CWorkspace: 0)
2687* (RWorkspace: need BDSPAC)
2688*
2689 CALL zbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,
2690 $ ldvt, cdum, 1, cdum, 1,
2691 $ rwork( irwork ), info )
2692*
2693 END IF
2694*
2695 ELSE IF( wntuo ) THEN
2696*
2697* Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2698* M right singular vectors to be computed in VT and
2699* M left singular vectors to be overwritten on A
2700*
2701 IF( lwork.GE.2*m*m+3*m ) THEN
2702*
2703* Sufficient workspace for a fast algorithm
2704*
2705 iu = 1
2706 IF( lwork.GE.wrkbl+2*lda*m ) THEN
2707*
2708* WORK(IU) is LDA by M and WORK(IR) is LDA by M
2709*
2710 ldwrku = lda
2711 ir = iu + ldwrku*m
2712 ldwrkr = lda
2713 ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
2714*
2715* WORK(IU) is LDA by M and WORK(IR) is M by M
2716*
2717 ldwrku = lda
2718 ir = iu + ldwrku*m
2719 ldwrkr = m
2720 ELSE
2721*
2722* WORK(IU) is M by M and WORK(IR) is M by M
2723*
2724 ldwrku = m
2725 ir = iu + ldwrku*m
2726 ldwrkr = m
2727 END IF
2728 itau = ir + ldwrkr*m
2729 iwork = itau + m
2730*
2731* Compute A=L*Q
2732* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2733* (RWorkspace: 0)
2734*
2735 CALL zgelqf( m, n, a, lda, work( itau ),
2736 $ work( iwork ), lwork-iwork+1, ierr )
2737*
2738* Copy L to WORK(IU), zeroing out below it
2739*
2740 CALL zlacpy( 'L', m, m, a, lda, work( iu ),
2741 $ ldwrku )
2742 CALL zlaset( 'U', m-1, m-1, czero, czero,
2743 $ work( iu+ldwrku ), ldwrku )
2744*
2745* Generate Q in A
2746* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2747* (RWorkspace: 0)
2748*
2749 CALL zunglq( m, n, m, a, lda, work( itau ),
2750 $ work( iwork ), lwork-iwork+1, ierr )
2751 ie = 1
2752 itauq = itau
2753 itaup = itauq + m
2754 iwork = itaup + m
2755*
2756* Bidiagonalize L in WORK(IU), copying result to
2757* WORK(IR)
2758* (CWorkspace: need 2*M*M+3*M,
2759* prefer 2*M*M+2*M+2*M*NB)
2760* (RWorkspace: need M)
2761*
2762 CALL zgebrd( m, m, work( iu ), ldwrku, s,
2763 $ rwork( ie ), work( itauq ),
2764 $ work( itaup ), work( iwork ),
2765 $ lwork-iwork+1, ierr )
2766 CALL zlacpy( 'L', m, m, work( iu ), ldwrku,
2767 $ work( ir ), ldwrkr )
2768*
2769* Generate right bidiagonalizing vectors in WORK(IU)
2770* (CWorkspace: need 2*M*M+3*M-1,
2771* prefer 2*M*M+2*M+(M-1)*NB)
2772* (RWorkspace: 0)
2773*
2774 CALL zungbr( 'P', m, m, m, work( iu ), ldwrku,
2775 $ work( itaup ), work( iwork ),
2776 $ lwork-iwork+1, ierr )
2777*
2778* Generate left bidiagonalizing vectors in WORK(IR)
2779* (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
2780* (RWorkspace: 0)
2781*
2782 CALL zungbr( 'Q', m, m, m, work( ir ), ldwrkr,
2783 $ work( itauq ), work( iwork ),
2784 $ lwork-iwork+1, ierr )
2785 irwork = ie + m
2786*
2787* Perform bidiagonal QR iteration, computing left
2788* singular vectors of L in WORK(IR) and computing
2789* right singular vectors of L in WORK(IU)
2790* (CWorkspace: need 2*M*M)
2791* (RWorkspace: need BDSPAC)
2792*
2793 CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2794 $ work( iu ), ldwrku, work( ir ),
2795 $ ldwrkr, cdum, 1, rwork( irwork ),
2796 $ info )
2797*
2798* Multiply right singular vectors of L in WORK(IU) by
2799* Q in A, storing result in VT
2800* (CWorkspace: need M*M)
2801* (RWorkspace: 0)
2802*
2803 CALL zgemm( 'N', 'N', m, n, m, cone, work( iu ),
2804 $ ldwrku, a, lda, czero, vt, ldvt )
2805*
2806* Copy left singular vectors of L to A
2807* (CWorkspace: need M*M)
2808* (RWorkspace: 0)
2809*
2810 CALL zlacpy( 'F', m, m, work( ir ), ldwrkr, a,
2811 $ lda )
2812*
2813 ELSE
2814*
2815* Insufficient workspace for a fast algorithm
2816*
2817 itau = 1
2818 iwork = itau + m
2819*
2820* Compute A=L*Q, copying result to VT
2821* (CWorkspace: need 2*M, prefer M+M*NB)
2822* (RWorkspace: 0)
2823*
2824 CALL zgelqf( m, n, a, lda, work( itau ),
2825 $ work( iwork ), lwork-iwork+1, ierr )
2826 CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
2827*
2828* Generate Q in VT
2829* (CWorkspace: need 2*M, prefer M+M*NB)
2830* (RWorkspace: 0)
2831*
2832 CALL zunglq( m, n, m, vt, ldvt, work( itau ),
2833 $ work( iwork ), lwork-iwork+1, ierr )
2834 ie = 1
2835 itauq = itau
2836 itaup = itauq + m
2837 iwork = itaup + m
2838*
2839* Zero out above L in A
2840*
2841 CALL zlaset( 'U', m-1, m-1, czero, czero,
2842 $ a( 1, 2 ), lda )
2843*
2844* Bidiagonalize L in A
2845* (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2846* (RWorkspace: need M)
2847*
2848 CALL zgebrd( m, m, a, lda, s, rwork( ie ),
2849 $ work( itauq ), work( itaup ),
2850 $ work( iwork ), lwork-iwork+1, ierr )
2851*
2852* Multiply right vectors bidiagonalizing L by Q in VT
2853* (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2854* (RWorkspace: 0)
2855*
2856 CALL zunmbr( 'P', 'L', 'C', m, n, m, a, lda,
2857 $ work( itaup ), vt, ldvt,
2858 $ work( iwork ), lwork-iwork+1, ierr )
2859*
2860* Generate left bidiagonalizing vectors of L in A
2861* (CWorkspace: need 3*M, prefer 2*M+M*NB)
2862* (RWorkspace: 0)
2863*
2864 CALL zungbr( 'Q', m, m, m, a, lda, work( itauq ),
2865 $ work( iwork ), lwork-iwork+1, ierr )
2866 irwork = ie + m
2867*
2868* Perform bidiagonal QR iteration, computing left
2869* singular vectors of A in A and computing right
2870* singular vectors of A in VT
2871* (CWorkspace: 0)
2872* (RWorkspace: need BDSPAC)
2873*
2874 CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
2875 $ ldvt, a, lda, cdum, 1,
2876 $ rwork( irwork ), info )
2877*
2878 END IF
2879*
2880 ELSE IF( wntuas ) THEN
2881*
2882* Path 6t(N much larger than M, JOBU='S' or 'A',
2883* JOBVT='S')
2884* M right singular vectors to be computed in VT and
2885* M left singular vectors to be computed in U
2886*
2887 IF( lwork.GE.m*m+3*m ) THEN
2888*
2889* Sufficient workspace for a fast algorithm
2890*
2891 iu = 1
2892 IF( lwork.GE.wrkbl+lda*m ) THEN
2893*
2894* WORK(IU) is LDA by N
2895*
2896 ldwrku = lda
2897 ELSE
2898*
2899* WORK(IU) is LDA by M
2900*
2901 ldwrku = m
2902 END IF
2903 itau = iu + ldwrku*m
2904 iwork = itau + m
2905*
2906* Compute A=L*Q
2907* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2908* (RWorkspace: 0)
2909*
2910 CALL zgelqf( m, n, a, lda, work( itau ),
2911 $ work( iwork ), lwork-iwork+1, ierr )
2912*
2913* Copy L to WORK(IU), zeroing out above it
2914*
2915 CALL zlacpy( 'L', m, m, a, lda, work( iu ),
2916 $ ldwrku )
2917 CALL zlaset( 'U', m-1, m-1, czero, czero,
2918 $ work( iu+ldwrku ), ldwrku )
2919*
2920* Generate Q in A
2921* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2922* (RWorkspace: 0)
2923*
2924 CALL zunglq( m, n, m, a, lda, work( itau ),
2925 $ work( iwork ), lwork-iwork+1, ierr )
2926 ie = 1
2927 itauq = itau
2928 itaup = itauq + m
2929 iwork = itaup + m
2930*
2931* Bidiagonalize L in WORK(IU), copying result to U
2932* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2933* (RWorkspace: need M)
2934*
2935 CALL zgebrd( m, m, work( iu ), ldwrku, s,
2936 $ rwork( ie ), work( itauq ),
2937 $ work( itaup ), work( iwork ),
2938 $ lwork-iwork+1, ierr )
2939 CALL zlacpy( 'L', m, m, work( iu ), ldwrku, u,
2940 $ ldu )
2941*
2942* Generate right bidiagonalizing vectors in WORK(IU)
2943* (CWorkspace: need M*M+3*M-1,
2944* prefer M*M+2*M+(M-1)*NB)
2945* (RWorkspace: 0)
2946*
2947 CALL zungbr( 'P', m, m, m, work( iu ), ldwrku,
2948 $ work( itaup ), work( iwork ),
2949 $ lwork-iwork+1, ierr )
2950*
2951* Generate left bidiagonalizing vectors in U
2952* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2953* (RWorkspace: 0)
2954*
2955 CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2956 $ work( iwork ), lwork-iwork+1, ierr )
2957 irwork = ie + m
2958*
2959* Perform bidiagonal QR iteration, computing left
2960* singular vectors of L in U and computing right
2961* singular vectors of L in WORK(IU)
2962* (CWorkspace: need M*M)
2963* (RWorkspace: need BDSPAC)
2964*
2965 CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2966 $ work( iu ), ldwrku, u, ldu, cdum, 1,
2967 $ rwork( irwork ), info )
2968*
2969* Multiply right singular vectors of L in WORK(IU) by
2970* Q in A, storing result in VT
2971* (CWorkspace: need M*M)
2972* (RWorkspace: 0)
2973*
2974 CALL zgemm( 'N', 'N', m, n, m, cone, work( iu ),
2975 $ ldwrku, a, lda, czero, vt, ldvt )
2976*
2977 ELSE
2978*
2979* Insufficient workspace for a fast algorithm
2980*
2981 itau = 1
2982 iwork = itau + m
2983*
2984* Compute A=L*Q, copying result to VT
2985* (CWorkspace: need 2*M, prefer M+M*NB)
2986* (RWorkspace: 0)
2987*
2988 CALL zgelqf( m, n, a, lda, work( itau ),
2989 $ work( iwork ), lwork-iwork+1, ierr )
2990 CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
2991*
2992* Generate Q in VT
2993* (CWorkspace: need 2*M, prefer M+M*NB)
2994* (RWorkspace: 0)
2995*
2996 CALL zunglq( m, n, m, vt, ldvt, work( itau ),
2997 $ work( iwork ), lwork-iwork+1, ierr )
2998*
2999* Copy L to U, zeroing out above it
3000*
3001 CALL zlacpy( 'L', m, m, a, lda, u, ldu )
3002 CALL zlaset( 'U', m-1, m-1, czero, czero,
3003 $ u( 1, 2 ), ldu )
3004 ie = 1
3005 itauq = itau
3006 itaup = itauq + m
3007 iwork = itaup + m
3008*
3009* Bidiagonalize L in U
3010* (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3011* (RWorkspace: need M)
3012*
3013 CALL zgebrd( m, m, u, ldu, s, rwork( ie ),
3014 $ work( itauq ), work( itaup ),
3015 $ work( iwork ), lwork-iwork+1, ierr )
3016*
3017* Multiply right bidiagonalizing vectors in U by Q
3018* in VT
3019* (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3020* (RWorkspace: 0)
3021*
3022 CALL zunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
3023 $ work( itaup ), vt, ldvt,
3024 $ work( iwork ), lwork-iwork+1, ierr )
3025*
3026* Generate left bidiagonalizing vectors in U
3027* (CWorkspace: need 3*M, prefer 2*M+M*NB)
3028* (RWorkspace: 0)
3029*
3030 CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3031 $ work( iwork ), lwork-iwork+1, ierr )
3032 irwork = ie + m
3033*
3034* Perform bidiagonal QR iteration, computing left
3035* singular vectors of A in U and computing right
3036* singular vectors of A in VT
3037* (CWorkspace: 0)
3038* (RWorkspace: need BDSPAC)
3039*
3040 CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3041 $ ldvt, u, ldu, cdum, 1,
3042 $ rwork( irwork ), info )
3043*
3044 END IF
3045*
3046 END IF
3047*
3048 ELSE IF( wntva ) THEN
3049*
3050 IF( wntun ) THEN
3051*
3052* Path 7t(N much larger than M, JOBU='N', JOBVT='A')
3053* N right singular vectors to be computed in VT and
3054* no left singular vectors to be computed
3055*
3056 IF( lwork.GE.m*m+max( n+m, 3*m ) ) THEN
3057*
3058* Sufficient workspace for a fast algorithm
3059*
3060 ir = 1
3061 IF( lwork.GE.wrkbl+lda*m ) THEN
3062*
3063* WORK(IR) is LDA by M
3064*
3065 ldwrkr = lda
3066 ELSE
3067*
3068* WORK(IR) is M by M
3069*
3070 ldwrkr = m
3071 END IF
3072 itau = ir + ldwrkr*m
3073 iwork = itau + m
3074*
3075* Compute A=L*Q, copying result to VT
3076* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3077* (RWorkspace: 0)
3078*
3079 CALL zgelqf( m, n, a, lda, work( itau ),
3080 $ work( iwork ), lwork-iwork+1, ierr )
3081 CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3082*
3083* Copy L to WORK(IR), zeroing out above it
3084*
3085 CALL zlacpy( 'L', m, m, a, lda, work( ir ),
3086 $ ldwrkr )
3087 CALL zlaset( 'U', m-1, m-1, czero, czero,
3088 $ work( ir+ldwrkr ), ldwrkr )
3089*
3090* Generate Q in VT
3091* (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3092* (RWorkspace: 0)
3093*
3094 CALL zunglq( n, n, m, vt, ldvt, work( itau ),
3095 $ work( iwork ), lwork-iwork+1, ierr )
3096 ie = 1
3097 itauq = itau
3098 itaup = itauq + m
3099 iwork = itaup + m
3100*
3101* Bidiagonalize L in WORK(IR)
3102* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3103* (RWorkspace: need M)
3104*
3105 CALL zgebrd( m, m, work( ir ), ldwrkr, s,
3106 $ rwork( ie ), work( itauq ),
3107 $ work( itaup ), work( iwork ),
3108 $ lwork-iwork+1, ierr )
3109*
3110* Generate right bidiagonalizing vectors in WORK(IR)
3111* (CWorkspace: need M*M+3*M-1,
3112* prefer M*M+2*M+(M-1)*NB)
3113* (RWorkspace: 0)
3114*
3115 CALL zungbr( 'P', m, m, m, work( ir ), ldwrkr,
3116 $ work( itaup ), work( iwork ),
3117 $ lwork-iwork+1, ierr )
3118 irwork = ie + m
3119*
3120* Perform bidiagonal QR iteration, computing right
3121* singular vectors of L in WORK(IR)
3122* (CWorkspace: need M*M)
3123* (RWorkspace: need BDSPAC)
3124*
3125 CALL zbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
3126 $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
3127 $ rwork( irwork ), info )
3128*
3129* Multiply right singular vectors of L in WORK(IR) by
3130* Q in VT, storing result in A
3131* (CWorkspace: need M*M)
3132* (RWorkspace: 0)
3133*
3134 CALL zgemm( 'N', 'N', m, n, m, cone, work( ir ),
3135 $ ldwrkr, vt, ldvt, czero, a, lda )
3136*
3137* Copy right singular vectors of A from A to VT
3138*
3139 CALL zlacpy( 'F', m, n, a, lda, vt, ldvt )
3140*
3141 ELSE
3142*
3143* Insufficient workspace for a fast algorithm
3144*
3145 itau = 1
3146 iwork = itau + m
3147*
3148* Compute A=L*Q, copying result to VT
3149* (CWorkspace: need 2*M, prefer M+M*NB)
3150* (RWorkspace: 0)
3151*
3152 CALL zgelqf( m, n, a, lda, work( itau ),
3153 $ work( iwork ), lwork-iwork+1, ierr )
3154 CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3155*
3156* Generate Q in VT
3157* (CWorkspace: need M+N, prefer M+N*NB)
3158* (RWorkspace: 0)
3159*
3160 CALL zunglq( n, n, m, vt, ldvt, work( itau ),
3161 $ work( iwork ), lwork-iwork+1, ierr )
3162 ie = 1
3163 itauq = itau
3164 itaup = itauq + m
3165 iwork = itaup + m
3166*
3167* Zero out above L in A
3168*
3169 CALL zlaset( 'U', m-1, m-1, czero, czero,
3170 $ a( 1, 2 ), lda )
3171*
3172* Bidiagonalize L in A
3173* (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3174* (RWorkspace: need M)
3175*
3176 CALL zgebrd( m, m, a, lda, s, rwork( ie ),
3177 $ work( itauq ), work( itaup ),
3178 $ work( iwork ), lwork-iwork+1, ierr )
3179*
3180* Multiply right bidiagonalizing vectors in A by Q
3181* in VT
3182* (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3183* (RWorkspace: 0)
3184*
3185 CALL zunmbr( 'P', 'L', 'C', m, n, m, a, lda,
3186 $ work( itaup ), vt, ldvt,
3187 $ work( iwork ), lwork-iwork+1, ierr )
3188 irwork = ie + m
3189*
3190* Perform bidiagonal QR iteration, computing right
3191* singular vectors of A in VT
3192* (CWorkspace: 0)
3193* (RWorkspace: need BDSPAC)
3194*
3195 CALL zbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,
3196 $ ldvt, cdum, 1, cdum, 1,
3197 $ rwork( irwork ), info )
3198*
3199 END IF
3200*
3201 ELSE IF( wntuo ) THEN
3202*
3203* Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3204* N right singular vectors to be computed in VT and
3205* M left singular vectors to be overwritten on A
3206*
3207 IF( lwork.GE.2*m*m+max( n+m, 3*m ) ) THEN
3208*
3209* Sufficient workspace for a fast algorithm
3210*
3211 iu = 1
3212 IF( lwork.GE.wrkbl+2*lda*m ) THEN
3213*
3214* WORK(IU) is LDA by M and WORK(IR) is LDA by M
3215*
3216 ldwrku = lda
3217 ir = iu + ldwrku*m
3218 ldwrkr = lda
3219 ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
3220*
3221* WORK(IU) is LDA by M and WORK(IR) is M by M
3222*
3223 ldwrku = lda
3224 ir = iu + ldwrku*m
3225 ldwrkr = m
3226 ELSE
3227*
3228* WORK(IU) is M by M and WORK(IR) is M by M
3229*
3230 ldwrku = m
3231 ir = iu + ldwrku*m
3232 ldwrkr = m
3233 END IF
3234 itau = ir + ldwrkr*m
3235 iwork = itau + m
3236*
3237* Compute A=L*Q, copying result to VT
3238* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3239* (RWorkspace: 0)
3240*
3241 CALL zgelqf( m, n, a, lda, work( itau ),
3242 $ work( iwork ), lwork-iwork+1, ierr )
3243 CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3244*
3245* Generate Q in VT
3246* (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3247* (RWorkspace: 0)
3248*
3249 CALL zunglq( n, n, m, vt, ldvt, work( itau ),
3250 $ work( iwork ), lwork-iwork+1, ierr )
3251*
3252* Copy L to WORK(IU), zeroing out above it
3253*
3254 CALL zlacpy( 'L', m, m, a, lda, work( iu ),
3255 $ ldwrku )
3256 CALL zlaset( 'U', m-1, m-1, czero, czero,
3257 $ work( iu+ldwrku ), ldwrku )
3258 ie = 1
3259 itauq = itau
3260 itaup = itauq + m
3261 iwork = itaup + m
3262*
3263* Bidiagonalize L in WORK(IU), copying result to
3264* WORK(IR)
3265* (CWorkspace: need 2*M*M+3*M,
3266* prefer 2*M*M+2*M+2*M*NB)
3267* (RWorkspace: need M)
3268*
3269 CALL zgebrd( m, m, work( iu ), ldwrku, s,
3270 $ rwork( ie ), work( itauq ),
3271 $ work( itaup ), work( iwork ),
3272 $ lwork-iwork+1, ierr )
3273 CALL zlacpy( 'L', m, m, work( iu ), ldwrku,
3274 $ work( ir ), ldwrkr )
3275*
3276* Generate right bidiagonalizing vectors in WORK(IU)
3277* (CWorkspace: need 2*M*M+3*M-1,
3278* prefer 2*M*M+2*M+(M-1)*NB)
3279* (RWorkspace: 0)
3280*
3281 CALL zungbr( 'P', m, m, m, work( iu ), ldwrku,
3282 $ work( itaup ), work( iwork ),
3283 $ lwork-iwork+1, ierr )
3284*
3285* Generate left bidiagonalizing vectors in WORK(IR)
3286* (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
3287* (RWorkspace: 0)
3288*
3289 CALL zungbr( 'Q', m, m, m, work( ir ), ldwrkr,
3290 $ work( itauq ), work( iwork ),
3291 $ lwork-iwork+1, ierr )
3292 irwork = ie + m
3293*
3294* Perform bidiagonal QR iteration, computing left
3295* singular vectors of L in WORK(IR) and computing
3296* right singular vectors of L in WORK(IU)
3297* (CWorkspace: need 2*M*M)
3298* (RWorkspace: need BDSPAC)
3299*
3300 CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
3301 $ work( iu ), ldwrku, work( ir ),
3302 $ ldwrkr, cdum, 1, rwork( irwork ),
3303 $ info )
3304*
3305* Multiply right singular vectors of L in WORK(IU) by
3306* Q in VT, storing result in A
3307* (CWorkspace: need M*M)
3308* (RWorkspace: 0)
3309*
3310 CALL zgemm( 'N', 'N', m, n, m, cone, work( iu ),
3311 $ ldwrku, vt, ldvt, czero, a, lda )
3312*
3313* Copy right singular vectors of A from A to VT
3314*
3315 CALL zlacpy( 'F', m, n, a, lda, vt, ldvt )
3316*
3317* Copy left singular vectors of A from WORK(IR) to A
3318*
3319 CALL zlacpy( 'F', m, m, work( ir ), ldwrkr, a,
3320 $ lda )
3321*
3322 ELSE
3323*
3324* Insufficient workspace for a fast algorithm
3325*
3326 itau = 1
3327 iwork = itau + m
3328*
3329* Compute A=L*Q, copying result to VT
3330* (CWorkspace: need 2*M, prefer M+M*NB)
3331* (RWorkspace: 0)
3332*
3333 CALL zgelqf( m, n, a, lda, work( itau ),
3334 $ work( iwork ), lwork-iwork+1, ierr )
3335 CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3336*
3337* Generate Q in VT
3338* (CWorkspace: need M+N, prefer M+N*NB)
3339* (RWorkspace: 0)
3340*
3341 CALL zunglq( n, n, m, vt, ldvt, work( itau ),
3342 $ work( iwork ), lwork-iwork+1, ierr )
3343 ie = 1
3344 itauq = itau
3345 itaup = itauq + m
3346 iwork = itaup + m
3347*
3348* Zero out above L in A
3349*
3350 CALL zlaset( 'U', m-1, m-1, czero, czero,
3351 $ a( 1, 2 ), lda )
3352*
3353* Bidiagonalize L in A
3354* (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3355* (RWorkspace: need M)
3356*
3357 CALL zgebrd( m, m, a, lda, s, rwork( ie ),
3358 $ work( itauq ), work( itaup ),
3359 $ work( iwork ), lwork-iwork+1, ierr )
3360*
3361* Multiply right bidiagonalizing vectors in A by Q
3362* in VT
3363* (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3364* (RWorkspace: 0)
3365*
3366 CALL zunmbr( 'P', 'L', 'C', m, n, m, a, lda,
3367 $ work( itaup ), vt, ldvt,
3368 $ work( iwork ), lwork-iwork+1, ierr )
3369*
3370* Generate left bidiagonalizing vectors in A
3371* (CWorkspace: need 3*M, prefer 2*M+M*NB)
3372* (RWorkspace: 0)
3373*
3374 CALL zungbr( 'Q', m, m, m, a, lda, work( itauq ),
3375 $ work( iwork ), lwork-iwork+1, ierr )
3376 irwork = ie + m
3377*
3378* Perform bidiagonal QR iteration, computing left
3379* singular vectors of A in A and computing right
3380* singular vectors of A in VT
3381* (CWorkspace: 0)
3382* (RWorkspace: need BDSPAC)
3383*
3384 CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3385 $ ldvt, a, lda, cdum, 1,
3386 $ rwork( irwork ), info )
3387*
3388 END IF
3389*
3390 ELSE IF( wntuas ) THEN
3391*
3392* Path 9t(N much larger than M, JOBU='S' or 'A',
3393* JOBVT='A')
3394* N right singular vectors to be computed in VT and
3395* M left singular vectors to be computed in U
3396*
3397 IF( lwork.GE.m*m+max( n+m, 3*m ) ) THEN
3398*
3399* Sufficient workspace for a fast algorithm
3400*
3401 iu = 1
3402 IF( lwork.GE.wrkbl+lda*m ) THEN
3403*
3404* WORK(IU) is LDA by M
3405*
3406 ldwrku = lda
3407 ELSE
3408*
3409* WORK(IU) is M by M
3410*
3411 ldwrku = m
3412 END IF
3413 itau = iu + ldwrku*m
3414 iwork = itau + m
3415*
3416* Compute A=L*Q, copying result to VT
3417* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3418* (RWorkspace: 0)
3419*
3420 CALL zgelqf( m, n, a, lda, work( itau ),
3421 $ work( iwork ), lwork-iwork+1, ierr )
3422 CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3423*
3424* Generate Q in VT
3425* (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3426* (RWorkspace: 0)
3427*
3428 CALL zunglq( n, n, m, vt, ldvt, work( itau ),
3429 $ work( iwork ), lwork-iwork+1, ierr )
3430*
3431* Copy L to WORK(IU), zeroing out above it
3432*
3433 CALL zlacpy( 'L', m, m, a, lda, work( iu ),
3434 $ ldwrku )
3435 CALL zlaset( 'U', m-1, m-1, czero, czero,
3436 $ work( iu+ldwrku ), ldwrku )
3437 ie = 1
3438 itauq = itau
3439 itaup = itauq + m
3440 iwork = itaup + m
3441*
3442* Bidiagonalize L in WORK(IU), copying result to U
3443* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3444* (RWorkspace: need M)
3445*
3446 CALL zgebrd( m, m, work( iu ), ldwrku, s,
3447 $ rwork( ie ), work( itauq ),
3448 $ work( itaup ), work( iwork ),
3449 $ lwork-iwork+1, ierr )
3450 CALL zlacpy( 'L', m, m, work( iu ), ldwrku, u,
3451 $ ldu )
3452*
3453* Generate right bidiagonalizing vectors in WORK(IU)
3454* (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
3455* (RWorkspace: 0)
3456*
3457 CALL zungbr( 'P', m, m, m, work( iu ), ldwrku,
3458 $ work( itaup ), work( iwork ),
3459 $ lwork-iwork+1, ierr )
3460*
3461* Generate left bidiagonalizing vectors in U
3462* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
3463* (RWorkspace: 0)
3464*
3465 CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3466 $ work( iwork ), lwork-iwork+1, ierr )
3467 irwork = ie + m
3468*
3469* Perform bidiagonal QR iteration, computing left
3470* singular vectors of L in U and computing right
3471* singular vectors of L in WORK(IU)
3472* (CWorkspace: need M*M)
3473* (RWorkspace: need BDSPAC)
3474*
3475 CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
3476 $ work( iu ), ldwrku, u, ldu, cdum, 1,
3477 $ rwork( irwork ), info )
3478*
3479* Multiply right singular vectors of L in WORK(IU) by
3480* Q in VT, storing result in A
3481* (CWorkspace: need M*M)
3482* (RWorkspace: 0)
3483*
3484 CALL zgemm( 'N', 'N', m, n, m, cone, work( iu ),
3485 $ ldwrku, vt, ldvt, czero, a, lda )
3486*
3487* Copy right singular vectors of A from A to VT
3488*
3489 CALL zlacpy( 'F', m, n, a, lda, vt, ldvt )
3490*
3491 ELSE
3492*
3493* Insufficient workspace for a fast algorithm
3494*
3495 itau = 1
3496 iwork = itau + m
3497*
3498* Compute A=L*Q, copying result to VT
3499* (CWorkspace: need 2*M, prefer M+M*NB)
3500* (RWorkspace: 0)
3501*
3502 CALL zgelqf( m, n, a, lda, work( itau ),
3503 $ work( iwork ), lwork-iwork+1, ierr )
3504 CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3505*
3506* Generate Q in VT
3507* (CWorkspace: need M+N, prefer M+N*NB)
3508* (RWorkspace: 0)
3509*
3510 CALL zunglq( n, n, m, vt, ldvt, work( itau ),
3511 $ work( iwork ), lwork-iwork+1, ierr )
3512*
3513* Copy L to U, zeroing out above it
3514*
3515 CALL zlacpy( 'L', m, m, a, lda, u, ldu )
3516 CALL zlaset( 'U', m-1, m-1, czero, czero,
3517 $ u( 1, 2 ), ldu )
3518 ie = 1
3519 itauq = itau
3520 itaup = itauq + m
3521 iwork = itaup + m
3522*
3523* Bidiagonalize L in U
3524* (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3525* (RWorkspace: need M)
3526*
3527 CALL zgebrd( m, m, u, ldu, s, rwork( ie ),
3528 $ work( itauq ), work( itaup ),
3529 $ work( iwork ), lwork-iwork+1, ierr )
3530*
3531* Multiply right bidiagonalizing vectors in U by Q
3532* in VT
3533* (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3534* (RWorkspace: 0)
3535*
3536 CALL zunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
3537 $ work( itaup ), vt, ldvt,
3538 $ work( iwork ), lwork-iwork+1, ierr )
3539*
3540* Generate left bidiagonalizing vectors in U
3541* (CWorkspace: need 3*M, prefer 2*M+M*NB)
3542* (RWorkspace: 0)
3543*
3544 CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3545 $ work( iwork ), lwork-iwork+1, ierr )
3546 irwork = ie + m
3547*
3548* Perform bidiagonal QR iteration, computing left
3549* singular vectors of A in U and computing right
3550* singular vectors of A in VT
3551* (CWorkspace: 0)
3552* (RWorkspace: need BDSPAC)
3553*
3554 CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3555 $ ldvt, u, ldu, cdum, 1,
3556 $ rwork( irwork ), info )
3557*
3558 END IF
3559*
3560 END IF
3561*
3562 END IF
3563*
3564 ELSE
3565*
3566* N .LT. MNTHR
3567*
3568* Path 10t(N greater than M, but not much larger)
3569* Reduce to bidiagonal form without LQ decomposition
3570*
3571 ie = 1
3572 itauq = 1
3573 itaup = itauq + m
3574 iwork = itaup + m
3575*
3576* Bidiagonalize A
3577* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
3578* (RWorkspace: M)
3579*
3580 CALL zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
3581 $ work( itaup ), work( iwork ), lwork-iwork+1,
3582 $ ierr )
3583 IF( wntuas ) THEN
3584*
3585* If left singular vectors desired in U, copy result to U
3586* and generate left bidiagonalizing vectors in U
3587* (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3588* (RWorkspace: 0)
3589*
3590 CALL zlacpy( 'L', m, m, a, lda, u, ldu )
3591 CALL zungbr( 'Q', m, m, n, u, ldu, work( itauq ),
3592 $ work( iwork ), lwork-iwork+1, ierr )
3593 END IF
3594 IF( wntvas ) THEN
3595*
3596* If right singular vectors desired in VT, copy result to
3597* VT and generate right bidiagonalizing vectors in VT
3598* (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB)
3599* (RWorkspace: 0)
3600*
3601 CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3602 IF( wntva )
3603 $ nrvt = n
3604 IF( wntvs )
3605 $ nrvt = m
3606 CALL zungbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
3607 $ work( iwork ), lwork-iwork+1, ierr )
3608 END IF
3609 IF( wntuo ) THEN
3610*
3611* If left singular vectors desired in A, generate left
3612* bidiagonalizing vectors in A
3613* (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3614* (RWorkspace: 0)
3615*
3616 CALL zungbr( 'Q', m, m, n, a, lda, work( itauq ),
3617 $ work( iwork ), lwork-iwork+1, ierr )
3618 END IF
3619 IF( wntvo ) THEN
3620*
3621* If right singular vectors desired in A, generate right
3622* bidiagonalizing vectors in A
3623* (CWorkspace: need 3*M, prefer 2*M+M*NB)
3624* (RWorkspace: 0)
3625*
3626 CALL zungbr( 'P', m, n, m, a, lda, work( itaup ),
3627 $ work( iwork ), lwork-iwork+1, ierr )
3628 END IF
3629 irwork = ie + m
3630 IF( wntuas .OR. wntuo )
3631 $ nru = m
3632 IF( wntun )
3633 $ nru = 0
3634 IF( wntvas .OR. wntvo )
3635 $ ncvt = n
3636 IF( wntvn )
3637 $ ncvt = 0
3638 IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
3639*
3640* Perform bidiagonal QR iteration, if desired, computing
3641* left singular vectors in U and computing right singular
3642* vectors in VT
3643* (CWorkspace: 0)
3644* (RWorkspace: need BDSPAC)
3645*
3646 CALL zbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,
3647 $ ldvt, u, ldu, cdum, 1, rwork( irwork ),
3648 $ info )
3649 ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
3650*
3651* Perform bidiagonal QR iteration, if desired, computing
3652* left singular vectors in U and computing right singular
3653* vectors in A
3654* (CWorkspace: 0)
3655* (RWorkspace: need BDSPAC)
3656*
3657 CALL zbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), a,
3658 $ lda, u, ldu, cdum, 1, rwork( irwork ),
3659 $ info )
3660 ELSE
3661*
3662* Perform bidiagonal QR iteration, if desired, computing
3663* left singular vectors in A and computing right singular
3664* vectors in VT
3665* (CWorkspace: 0)
3666* (RWorkspace: need BDSPAC)
3667*
3668 CALL zbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,
3669 $ ldvt, a, lda, cdum, 1, rwork( irwork ),
3670 $ info )
3671 END IF
3672*
3673 END IF
3674*
3675 END IF
3676*
3677* Undo scaling if necessary
3678*
3679 IF( iscl.EQ.1 ) THEN
3680 IF( anrm.GT.bignum )
3681 $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
3682 $ ierr )
3683 IF( info.NE.0 .AND. anrm.GT.bignum )
3684 $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn-1, 1,
3685 $ rwork( ie ), minmn, ierr )
3686 IF( anrm.LT.smlnum )
3687 $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
3688 $ ierr )
3689 IF( info.NE.0 .AND. anrm.LT.smlnum )
3690 $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1,
3691 $ rwork( ie ), minmn, ierr )
3692 END IF
3693*
3694* Return optimal workspace in WORK(1)
3695*
3696 work( 1 ) = maxwrk
3697*
3698 RETURN
3699*
3700* End of ZGESVD
3701*
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
Definition ilaenv.f:162
subroutine zbdsqr(uplo, n, ncvt, nru, ncc, d, e, vt, ldvt, u, ldu, c, ldc, rwork, info)
ZBDSQR
Definition zbdsqr.f:222

◆ zgesvdq()

subroutine zgesvdq ( character joba,
character jobp,
character jobr,
character jobu,
character jobv,
integer m,
integer n,
complex*16, dimension( lda, * ) a,
integer lda,
double precision, dimension( * ) s,
complex*16, dimension( ldu, * ) u,
integer ldu,
complex*16, dimension( ldv, * ) v,
integer ldv,
integer numrank,
integer, dimension( * ) iwork,
integer liwork,
complex*16, dimension( * ) cwork,
integer lcwork,
double precision, dimension( * ) rwork,
integer lrwork,
integer info )

ZGESVDQ computes the singular value decomposition (SVD) with a QR-Preconditioned QR SVD Method for GE matrices

Download ZGESVDQ + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> ZCGESVDQ computes the singular value decomposition (SVD) of a complex
!> M-by-N matrix A, where M >= N. The SVD of A is written as
!>                                    [++]   [xx]   [x0]   [xx]
!>              A = U * SIGMA * V^*,  [++] = [xx] * [ox] * [xx]
!>                                    [++]   [xx]
!> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal
!> matrix, and V is an N-by-N unitary matrix. The diagonal elements
!> of SIGMA are the singular values of A. The columns of U and V are the
!> left and the right singular vectors of A, respectively.
!> 
Parameters
[in]JOBA
!>  JOBA is CHARACTER*1
!>  Specifies the level of accuracy in the computed SVD
!>  = 'A' The requested accuracy corresponds to having the backward
!>        error bounded by || delta A ||_F <= f(m,n) * EPS * || A ||_F,
!>        where EPS = DLAMCH('Epsilon'). This authorises ZGESVDQ to
!>        truncate the computed triangular factor in a rank revealing
!>        QR factorization whenever the truncated part is below the
!>        threshold of the order of EPS * ||A||_F. This is aggressive
!>        truncation level.
!>  = 'M' Similarly as with 'A', but the truncation is more gentle: it
!>        is allowed only when there is a drop on the diagonal of the
!>        triangular factor in the QR factorization. This is medium
!>        truncation level.
!>  = 'H' High accuracy requested. No numerical rank determination based
!>        on the rank revealing QR factorization is attempted.
!>  = 'E' Same as 'H', and in addition the condition number of column
!>        scaled A is estimated and returned in  RWORK(1).
!>        N^(-1/4)*RWORK(1) <= ||pinv(A_scaled)||_2 <= N^(1/4)*RWORK(1)
!> 
[in]JOBP
!>  JOBP is CHARACTER*1
!>  = 'P' The rows of A are ordered in decreasing order with respect to
!>        ||A(i,:)||_\infty. This enhances numerical accuracy at the cost
!>        of extra data movement. Recommended for numerical robustness.
!>  = 'N' No row pivoting.
!> 
[in]JOBR
!>          JOBR is CHARACTER*1
!>          = 'T' After the initial pivoted QR factorization, ZGESVD is applied to
!>          the adjoint R**H of the computed triangular factor R. This involves
!>          some extra data movement (matrix transpositions). Useful for
!>          experiments, research and development.
!>          = 'N' The triangular factor R is given as input to CGESVD. This may be
!>          preferred as it involves less data movement.
!> 
[in]JOBU
!>          JOBU is CHARACTER*1
!>          = 'A' All M left singular vectors are computed and returned in the
!>          matrix U. See the description of U.
!>          = 'S' or 'U' N = min(M,N) left singular vectors are computed and returned
!>          in the matrix U. See the description of U.
!>          = 'R' Numerical rank NUMRANK is determined and only NUMRANK left singular
!>          vectors are computed and returned in the matrix U.
!>          = 'F' The N left singular vectors are returned in factored form as the
!>          product of the Q factor from the initial QR factorization and the
!>          N left singular vectors of (R**H , 0)**H. If row pivoting is used,
!>          then the necessary information on the row pivoting is stored in
!>          IWORK(N+1:N+M-1).
!>          = 'N' The left singular vectors are not computed.
!> 
[in]JOBV
!>          JOBV is CHARACTER*1
!>          = 'A', 'V' All N right singular vectors are computed and returned in
!>          the matrix V.
!>          = 'R' Numerical rank NUMRANK is determined and only NUMRANK right singular
!>          vectors are computed and returned in the matrix V. This option is
!>          allowed only if JOBU = 'R' or JOBU = 'N'; otherwise it is illegal.
!>          = 'N' The right singular vectors are not computed.
!> 
[in]M
!>          M is INTEGER
!>          The number of rows of the input matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>          The number of columns of the input matrix A.  M >= N >= 0.
!> 
[in,out]A
!>          A is COMPLEX*16 array of dimensions LDA x N
!>          On entry, the input matrix A.
!>          On exit, if JOBU .NE. 'N' or JOBV .NE. 'N', the lower triangle of A contains
!>          the Householder vectors as stored by ZGEQP3. If JOBU = 'F', these Householder
!>          vectors together with CWORK(1:N) can be used to restore the Q factors from
!>          the initial pivoted QR factorization of A. See the description of U.
!> 
[in]LDA
!>          LDA is INTEGER.
!>          The leading dimension of the array A.  LDA >= max(1,M).
!> 
[out]S
!>          S is DOUBLE PRECISION array of dimension N.
!>          The singular values of A, ordered so that S(i) >= S(i+1).
!> 
[out]U
!>          U is COMPLEX*16 array, dimension
!>          LDU x M if JOBU = 'A'; see the description of LDU. In this case,
!>          on exit, U contains the M left singular vectors.
!>          LDU x N if JOBU = 'S', 'U', 'R' ; see the description of LDU. In this
!>          case, U contains the leading N or the leading NUMRANK left singular vectors.
!>          LDU x N if JOBU = 'F' ; see the description of LDU. In this case U
!>          contains N x N unitary matrix that can be used to form the left
!>          singular vectors.
!>          If JOBU = 'N', U is not referenced.
!> 
[in]LDU
!>          LDU is INTEGER.
!>          The leading dimension of the array U.
!>          If JOBU = 'A', 'S', 'U', 'R',  LDU >= max(1,M).
!>          If JOBU = 'F',                 LDU >= max(1,N).
!>          Otherwise,                     LDU >= 1.
!> 
[out]V
!>          V is COMPLEX*16 array, dimension
!>          LDV x N if JOBV = 'A', 'V', 'R' or if JOBA = 'E' .
!>          If JOBV = 'A', or 'V',  V contains the N-by-N unitary matrix  V**H;
!>          If JOBV = 'R', V contains the first NUMRANK rows of V**H (the right
!>          singular vectors, stored rowwise, of the NUMRANK largest singular values).
!>          If JOBV = 'N' and JOBA = 'E', V is used as a workspace.
!>          If JOBV = 'N', and JOBA.NE.'E', V is not referenced.
!> 
[in]LDV
!>          LDV is INTEGER
!>          The leading dimension of the array V.
!>          If JOBV = 'A', 'V', 'R',  or JOBA = 'E', LDV >= max(1,N).
!>          Otherwise,                               LDV >= 1.
!> 
[out]NUMRANK
!>          NUMRANK is INTEGER
!>          NUMRANK is the numerical rank first determined after the rank
!>          revealing QR factorization, following the strategy specified by the
!>          value of JOBA. If JOBV = 'R' and JOBU = 'R', only NUMRANK
!>          leading singular values and vectors are then requested in the call
!>          of CGESVD. The final value of NUMRANK might be further reduced if
!>          some singular values are computed as zeros.
!> 
[out]IWORK
!>          IWORK is INTEGER array, dimension (max(1, LIWORK)).
!>          On exit, IWORK(1:N) contains column pivoting permutation of the
!>          rank revealing QR factorization.
!>          If JOBP = 'P', IWORK(N+1:N+M-1) contains the indices of the sequence
!>          of row swaps used in row pivoting. These can be used to restore the
!>          left singular vectors in the case JOBU = 'F'.
!>
!>          If LIWORK, LCWORK, or LRWORK = -1, then on exit, if INFO = 0,
!>          IWORK(1) returns the minimal LIWORK.
!> 
[in]LIWORK
!>          LIWORK is INTEGER
!>          The dimension of the array IWORK.
!>          LIWORK >= N + M - 1,  if JOBP = 'P';
!>          LIWORK >= N           if JOBP = 'N'.
!>
!>          If LIWORK = -1, then a workspace query is assumed; the routine
!>          only calculates and returns the optimal and minimal sizes
!>          for the CWORK, IWORK, and RWORK arrays, and no error
!>          message related to LCWORK is issued by XERBLA.
!> 
[out]CWORK
!>          CWORK is COMPLEX*12 array, dimension (max(2, LCWORK)), used as a workspace.
!>          On exit, if, on entry, LCWORK.NE.-1, CWORK(1:N) contains parameters
!>          needed to recover the Q factor from the QR factorization computed by
!>          ZGEQP3.
!>
!>          If LIWORK, LCWORK, or LRWORK = -1, then on exit, if INFO = 0,
!>          CWORK(1) returns the optimal LCWORK, and
!>          CWORK(2) returns the minimal LCWORK.
!> 
[in,out]LCWORK
!>          LCWORK is INTEGER
!>          The dimension of the array CWORK. It is determined as follows:
!>          Let  LWQP3 = N+1,  LWCON = 2*N, and let
!>          LWUNQ = { MAX( N, 1 ),  if JOBU = 'R', 'S', or 'U'
!>          { MAX( M, 1 ),  if JOBU = 'A'
!>          LWSVD = MAX( 3*N, 1 )
!>          LWLQF = MAX( N/2, 1 ), LWSVD2 = MAX( 3*(N/2), 1 ), LWUNLQ = MAX( N, 1 ),
!>          LWQRF = MAX( N/2, 1 ), LWUNQ2 = MAX( N, 1 )
!>          Then the minimal value of LCWORK is:
!>          = MAX( N + LWQP3, LWSVD )        if only the singular values are needed;
!>          = MAX( N + LWQP3, LWCON, LWSVD ) if only the singular values are needed,
!>                                   and a scaled condition estimate requested;
!>
!>          = N + MAX( LWQP3, LWSVD, LWUNQ ) if the singular values and the left
!>                                   singular vectors are requested;
!>          = N + MAX( LWQP3, LWCON, LWSVD, LWUNQ ) if the singular values and the left
!>                                   singular vectors are requested, and also
!>                                   a scaled condition estimate requested;
!>
!>          = N + MAX( LWQP3, LWSVD )        if the singular values and the right
!>                                   singular vectors are requested;
!>          = N + MAX( LWQP3, LWCON, LWSVD ) if the singular values and the right
!>                                   singular vectors are requested, and also
!>                                   a scaled condition etimate requested;
!>
!>          = N + MAX( LWQP3, LWSVD, LWUNQ ) if the full SVD is requested with JOBV = 'R';
!>                                   independent of JOBR;
!>          = N + MAX( LWQP3, LWCON, LWSVD, LWUNQ ) if the full SVD is requested,
!>                                   JOBV = 'R' and, also a scaled condition
!>                                   estimate requested; independent of JOBR;
!>          = MAX( N + MAX( LWQP3, LWSVD, LWUNQ ),
!>         N + MAX( LWQP3, N/2+LWLQF, N/2+LWSVD2, N/2+LWUNLQ, LWUNQ) ) if the
!>                         full SVD is requested with JOBV = 'A' or 'V', and
!>                         JOBR ='N'
!>          = MAX( N + MAX( LWQP3, LWCON, LWSVD, LWUNQ ),
!>         N + MAX( LWQP3, LWCON, N/2+LWLQF, N/2+LWSVD2, N/2+LWUNLQ, LWUNQ ) )
!>                         if the full SVD is requested with JOBV = 'A' or 'V', and
!>                         JOBR ='N', and also a scaled condition number estimate
!>                         requested.
!>          = MAX( N + MAX( LWQP3, LWSVD, LWUNQ ),
!>         N + MAX( LWQP3, N/2+LWQRF, N/2+LWSVD2, N/2+LWUNQ2, LWUNQ ) ) if the
!>                         full SVD is requested with JOBV = 'A', 'V', and JOBR ='T'
!>          = MAX( N + MAX( LWQP3, LWCON, LWSVD, LWUNQ ),
!>         N + MAX( LWQP3, LWCON, N/2+LWQRF, N/2+LWSVD2, N/2+LWUNQ2, LWUNQ ) )
!>                         if the full SVD is requested with JOBV = 'A', 'V' and
!>                         JOBR ='T', and also a scaled condition number estimate
!>                         requested.
!>          Finally, LCWORK must be at least two: LCWORK = MAX( 2, LCWORK ).
!>
!>          If LCWORK = -1, then a workspace query is assumed; the routine
!>          only calculates and returns the optimal and minimal sizes
!>          for the CWORK, IWORK, and RWORK arrays, and no error
!>          message related to LCWORK is issued by XERBLA.
!> 
[out]RWORK
!>          RWORK is DOUBLE PRECISION array, dimension (max(1, LRWORK)).
!>          On exit,
!>          1. If JOBA = 'E', RWORK(1) contains an estimate of the condition
!>          number of column scaled A. If A = C * D where D is diagonal and C
!>          has unit columns in the Euclidean norm, then, assuming full column rank,
!>          N^(-1/4) * RWORK(1) <= ||pinv(C)||_2 <= N^(1/4) * RWORK(1).
!>          Otherwise, RWORK(1) = -1.
!>          2. RWORK(2) contains the number of singular values computed as
!>          exact zeros in ZGESVD applied to the upper triangular or trapezoidal
!>          R (from the initial QR factorization). In case of early exit (no call to
!>          ZGESVD, such as in the case of zero matrix) RWORK(2) = -1.
!>
!>          If LIWORK, LCWORK, or LRWORK = -1, then on exit, if INFO = 0,
!>          RWORK(1) returns the minimal LRWORK.
!> 
[in]LRWORK
!>          LRWORK is INTEGER.
!>          The dimension of the array RWORK.
!>          If JOBP ='P', then LRWORK >= MAX(2, M, 5*N);
!>          Otherwise, LRWORK >= MAX(2, 5*N).
!>
!>          If LRWORK = -1, then a workspace query is assumed; the routine
!>          only calculates and returns the optimal and minimal sizes
!>          for the CWORK, IWORK, and RWORK arrays, and no error
!>          message related to LCWORK is issued by XERBLA.
!> 
[out]INFO
!>          INFO is INTEGER
!>          = 0:  successful exit.
!>          < 0:  if INFO = -i, the i-th argument had an illegal value.
!>          > 0:  if ZBDSQR did not converge, INFO specifies how many superdiagonals
!>          of an intermediate bidiagonal form B (computed in ZGESVD) did not
!>          converge to zero.
!> 
Further Details:
!>
!>   1. The data movement (matrix transpose) is coded using simple nested
!>   DO-loops because BLAS and LAPACK do not provide corresponding subroutines.
!>   Those DO-loops are easily identified in this source code - by the CONTINUE
!>   statements labeled with 11**. In an optimized version of this code, the
!>   nested DO loops should be replaced with calls to an optimized subroutine.
!>   2. This code scales A by 1/SQRT(M) if the largest ABS(A(i,j)) could cause
!>   column norm overflow. This is the minial precaution and it is left to the
!>   SVD routine (CGESVD) to do its own preemptive scaling if potential over-
!>   or underflows are detected. To avoid repeated scanning of the array A,
!>   an optimal implementation would do all necessary scaling before calling
!>   CGESVD and the scaling in CGESVD can be switched off.
!>   3. Other comments related to code optimization are given in comments in the
!>   code, enlosed in [[double brackets]].
!> 
Bugs, examples and comments
!>  Please report all bugs and send interesting examples and/or comments to
!>  drmac@math.hr. Thank you.
!> 
References
!>  [1] Zlatko Drmac, Algorithm 977: A QR-Preconditioned QR SVD Method for
!>      Computing the SVD with High Accuracy. ACM Trans. Math. Softw.
!>      44(1): 11:1-11:30 (2017)
!>
!>  SIGMA library, xGESVDQ section updated February 2016.
!>  Developed and coded by Zlatko Drmac, Department of Mathematics
!>  University of Zagreb, Croatia, drmac@math.hr
!> 
Contributors:
!> Developed and coded by Zlatko Drmac, Department of Mathematics
!>  University of Zagreb, Croatia, drmac@math.hr
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 410 of file zgesvdq.f.

413* .. Scalar Arguments ..
414 IMPLICIT NONE
415 CHARACTER JOBA, JOBP, JOBR, JOBU, JOBV
416 INTEGER M, N, LDA, LDU, LDV, NUMRANK, LIWORK, LCWORK, LRWORK,
417 $ INFO
418* ..
419* .. Array Arguments ..
420 COMPLEX*16 A( LDA, * ), U( LDU, * ), V( LDV, * ), CWORK( * )
421 DOUBLE PRECISION S( * ), RWORK( * )
422 INTEGER IWORK( * )
423*
424* =====================================================================
425*
426* .. Parameters ..
427 DOUBLE PRECISION ZERO, ONE
428 parameter( zero = 0.0d0, one = 1.0d0 )
429 COMPLEX*16 CZERO, CONE
430 parameter( czero = (0.0d0,0.0d0), cone = (1.0d0,0.0d0) )
431* ..
432* .. Local Scalars ..
433 INTEGER IERR, NR, N1, OPTRATIO, p, q
434 INTEGER LWCON, LWQP3, LWRK_ZGELQF, LWRK_ZGESVD, LWRK_ZGESVD2,
435 $ LWRK_ZGEQP3, LWRK_ZGEQRF, LWRK_ZUNMLQ, LWRK_ZUNMQR,
436 $ LWRK_ZUNMQR2, LWLQF, LWQRF, LWSVD, LWSVD2, LWUNQ,
437 $ LWUNQ2, LWUNLQ, MINWRK, MINWRK2, OPTWRK, OPTWRK2,
438 $ IMINWRK, RMINWRK
439 LOGICAL ACCLA, ACCLM, ACCLH, ASCALED, CONDA, DNTWU, DNTWV,
440 $ LQUERY, LSVC0, LSVEC, ROWPRM, RSVEC, RTRANS, WNTUA,
441 $ WNTUF, WNTUR, WNTUS, WNTVA, WNTVR
442 DOUBLE PRECISION BIG, EPSLN, RTMP, SCONDA, SFMIN
443 COMPLEX*16 CTMP
444* ..
445* .. Local Arrays
446 COMPLEX*16 CDUMMY(1)
447 DOUBLE PRECISION RDUMMY(1)
448* ..
449* .. External Subroutines (BLAS, LAPACK)
450 EXTERNAL zgelqf, zgeqp3, zgeqrf, zgesvd, zlacpy, zlapmt,
453* ..
454* .. External Functions (BLAS, LAPACK)
455 LOGICAL LSAME
456 INTEGER IDAMAX
457 DOUBLE PRECISION ZLANGE, DZNRM2, DLAMCH
458 EXTERNAL lsame, zlange, idamax, dznrm2, dlamch
459* ..
460* .. Intrinsic Functions ..
461 INTRINSIC abs, conjg, max, min, dble, sqrt
462* ..
463* .. Executable Statements ..
464*
465* Test the input arguments
466*
467 wntus = lsame( jobu, 'S' ) .OR. lsame( jobu, 'U' )
468 wntur = lsame( jobu, 'R' )
469 wntua = lsame( jobu, 'A' )
470 wntuf = lsame( jobu, 'F' )
471 lsvc0 = wntus .OR. wntur .OR. wntua
472 lsvec = lsvc0 .OR. wntuf
473 dntwu = lsame( jobu, 'N' )
474*
475 wntvr = lsame( jobv, 'R' )
476 wntva = lsame( jobv, 'A' ) .OR. lsame( jobv, 'V' )
477 rsvec = wntvr .OR. wntva
478 dntwv = lsame( jobv, 'N' )
479*
480 accla = lsame( joba, 'A' )
481 acclm = lsame( joba, 'M' )
482 conda = lsame( joba, 'E' )
483 acclh = lsame( joba, 'H' ) .OR. conda
484*
485 rowprm = lsame( jobp, 'P' )
486 rtrans = lsame( jobr, 'T' )
487*
488 IF ( rowprm ) THEN
489 iminwrk = max( 1, n + m - 1 )
490 rminwrk = max( 2, m, 5*n )
491 ELSE
492 iminwrk = max( 1, n )
493 rminwrk = max( 2, 5*n )
494 END IF
495 lquery = (liwork .EQ. -1 .OR. lcwork .EQ. -1 .OR. lrwork .EQ. -1)
496 info = 0
497 IF ( .NOT. ( accla .OR. acclm .OR. acclh ) ) THEN
498 info = -1
499 ELSE IF ( .NOT.( rowprm .OR. lsame( jobp, 'N' ) ) ) THEN
500 info = -2
501 ELSE IF ( .NOT.( rtrans .OR. lsame( jobr, 'N' ) ) ) THEN
502 info = -3
503 ELSE IF ( .NOT.( lsvec .OR. dntwu ) ) THEN
504 info = -4
505 ELSE IF ( wntur .AND. wntva ) THEN
506 info = -5
507 ELSE IF ( .NOT.( rsvec .OR. dntwv )) THEN
508 info = -5
509 ELSE IF ( m.LT.0 ) THEN
510 info = -6
511 ELSE IF ( ( n.LT.0 ) .OR. ( n.GT.m ) ) THEN
512 info = -7
513 ELSE IF ( lda.LT.max( 1, m ) ) THEN
514 info = -9
515 ELSE IF ( ldu.LT.1 .OR. ( lsvc0 .AND. ldu.LT.m ) .OR.
516 $ ( wntuf .AND. ldu.LT.n ) ) THEN
517 info = -12
518 ELSE IF ( ldv.LT.1 .OR. ( rsvec .AND. ldv.LT.n ) .OR.
519 $ ( conda .AND. ldv.LT.n ) ) THEN
520 info = -14
521 ELSE IF ( liwork .LT. iminwrk .AND. .NOT. lquery ) THEN
522 info = -17
523 END IF
524*
525*
526 IF ( info .EQ. 0 ) THEN
527* .. compute the minimal and the optimal workspace lengths
528* [[The expressions for computing the minimal and the optimal
529* values of LCWORK are written with a lot of redundancy and
530* can be simplified. However, this detailed form is easier for
531* maintenance and modifications of the code.]]
532*
533* .. minimal workspace length for ZGEQP3 of an M x N matrix
534 lwqp3 = n+1
535* .. minimal workspace length for ZUNMQR to build left singular vectors
536 IF ( wntus .OR. wntur ) THEN
537 lwunq = max( n , 1 )
538 ELSE IF ( wntua ) THEN
539 lwunq = max( m , 1 )
540 END IF
541* .. minimal workspace length for ZPOCON of an N x N matrix
542 lwcon = 2 * n
543* .. ZGESVD of an N x N matrix
544 lwsvd = max( 3 * n, 1 )
545 IF ( lquery ) THEN
546 CALL zgeqp3( m, n, a, lda, iwork, cdummy, cdummy, -1,
547 $ rdummy, ierr )
548 lwrk_zgeqp3 = int( cdummy(1) )
549 IF ( wntus .OR. wntur ) THEN
550 CALL zunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,
551 $ ldu, cdummy, -1, ierr )
552 lwrk_zunmqr = int( cdummy(1) )
553 ELSE IF ( wntua ) THEN
554 CALL zunmqr( 'L', 'N', m, m, n, a, lda, cdummy, u,
555 $ ldu, cdummy, -1, ierr )
556 lwrk_zunmqr = int( cdummy(1) )
557 ELSE
558 lwrk_zunmqr = 0
559 END IF
560 END IF
561 minwrk = 2
562 optwrk = 2
563 IF ( .NOT. (lsvec .OR. rsvec ) ) THEN
564* .. minimal and optimal sizes of the complex workspace if
565* only the singular values are requested
566 IF ( conda ) THEN
567 minwrk = max( n+lwqp3, lwcon, lwsvd )
568 ELSE
569 minwrk = max( n+lwqp3, lwsvd )
570 END IF
571 IF ( lquery ) THEN
572 CALL zgesvd( 'N', 'N', n, n, a, lda, s, u, ldu,
573 $ v, ldv, cdummy, -1, rdummy, ierr )
574 lwrk_zgesvd = int( cdummy(1) )
575 IF ( conda ) THEN
576 optwrk = max( n+lwrk_zgeqp3, n+lwcon, lwrk_zgesvd )
577 ELSE
578 optwrk = max( n+lwrk_zgeqp3, lwrk_zgesvd )
579 END IF
580 END IF
581 ELSE IF ( lsvec .AND. (.NOT.rsvec) ) THEN
582* .. minimal and optimal sizes of the complex workspace if the
583* singular values and the left singular vectors are requested
584 IF ( conda ) THEN
585 minwrk = n + max( lwqp3, lwcon, lwsvd, lwunq )
586 ELSE
587 minwrk = n + max( lwqp3, lwsvd, lwunq )
588 END IF
589 IF ( lquery ) THEN
590 IF ( rtrans ) THEN
591 CALL zgesvd( 'N', 'O', n, n, a, lda, s, u, ldu,
592 $ v, ldv, cdummy, -1, rdummy, ierr )
593 ELSE
594 CALL zgesvd( 'O', 'N', n, n, a, lda, s, u, ldu,
595 $ v, ldv, cdummy, -1, rdummy, ierr )
596 END IF
597 lwrk_zgesvd = int( cdummy(1) )
598 IF ( conda ) THEN
599 optwrk = n + max( lwrk_zgeqp3, lwcon, lwrk_zgesvd,
600 $ lwrk_zunmqr )
601 ELSE
602 optwrk = n + max( lwrk_zgeqp3, lwrk_zgesvd,
603 $ lwrk_zunmqr )
604 END IF
605 END IF
606 ELSE IF ( rsvec .AND. (.NOT.lsvec) ) THEN
607* .. minimal and optimal sizes of the complex workspace if the
608* singular values and the right singular vectors are requested
609 IF ( conda ) THEN
610 minwrk = n + max( lwqp3, lwcon, lwsvd )
611 ELSE
612 minwrk = n + max( lwqp3, lwsvd )
613 END IF
614 IF ( lquery ) THEN
615 IF ( rtrans ) THEN
616 CALL zgesvd( 'O', 'N', n, n, a, lda, s, u, ldu,
617 $ v, ldv, cdummy, -1, rdummy, ierr )
618 ELSE
619 CALL zgesvd( 'N', 'O', n, n, a, lda, s, u, ldu,
620 $ v, ldv, cdummy, -1, rdummy, ierr )
621 END IF
622 lwrk_zgesvd = int( cdummy(1) )
623 IF ( conda ) THEN
624 optwrk = n + max( lwrk_zgeqp3, lwcon, lwrk_zgesvd )
625 ELSE
626 optwrk = n + max( lwrk_zgeqp3, lwrk_zgesvd )
627 END IF
628 END IF
629 ELSE
630* .. minimal and optimal sizes of the complex workspace if the
631* full SVD is requested
632 IF ( rtrans ) THEN
633 minwrk = max( lwqp3, lwsvd, lwunq )
634 IF ( conda ) minwrk = max( minwrk, lwcon )
635 minwrk = minwrk + n
636 IF ( wntva ) THEN
637* .. minimal workspace length for N x N/2 ZGEQRF
638 lwqrf = max( n/2, 1 )
639* .. minimal workspace length for N/2 x N/2 ZGESVD
640 lwsvd2 = max( 3 * (n/2), 1 )
641 lwunq2 = max( n, 1 )
642 minwrk2 = max( lwqp3, n/2+lwqrf, n/2+lwsvd2,
643 $ n/2+lwunq2, lwunq )
644 IF ( conda ) minwrk2 = max( minwrk2, lwcon )
645 minwrk2 = n + minwrk2
646 minwrk = max( minwrk, minwrk2 )
647 END IF
648 ELSE
649 minwrk = max( lwqp3, lwsvd, lwunq )
650 IF ( conda ) minwrk = max( minwrk, lwcon )
651 minwrk = minwrk + n
652 IF ( wntva ) THEN
653* .. minimal workspace length for N/2 x N ZGELQF
654 lwlqf = max( n/2, 1 )
655 lwsvd2 = max( 3 * (n/2), 1 )
656 lwunlq = max( n , 1 )
657 minwrk2 = max( lwqp3, n/2+lwlqf, n/2+lwsvd2,
658 $ n/2+lwunlq, lwunq )
659 IF ( conda ) minwrk2 = max( minwrk2, lwcon )
660 minwrk2 = n + minwrk2
661 minwrk = max( minwrk, minwrk2 )
662 END IF
663 END IF
664 IF ( lquery ) THEN
665 IF ( rtrans ) THEN
666 CALL zgesvd( 'O', 'A', n, n, a, lda, s, u, ldu,
667 $ v, ldv, cdummy, -1, rdummy, ierr )
668 lwrk_zgesvd = int( cdummy(1) )
669 optwrk = max(lwrk_zgeqp3,lwrk_zgesvd,lwrk_zunmqr)
670 IF ( conda ) optwrk = max( optwrk, lwcon )
671 optwrk = n + optwrk
672 IF ( wntva ) THEN
673 CALL zgeqrf(n,n/2,u,ldu,cdummy,cdummy,-1,ierr)
674 lwrk_zgeqrf = int( cdummy(1) )
675 CALL zgesvd( 'S', 'O', n/2,n/2, v,ldv, s, u,ldu,
676 $ v, ldv, cdummy, -1, rdummy, ierr )
677 lwrk_zgesvd2 = int( cdummy(1) )
678 CALL zunmqr( 'R', 'C', n, n, n/2, u, ldu, cdummy,
679 $ v, ldv, cdummy, -1, ierr )
680 lwrk_zunmqr2 = int( cdummy(1) )
681 optwrk2 = max( lwrk_zgeqp3, n/2+lwrk_zgeqrf,
682 $ n/2+lwrk_zgesvd2, n/2+lwrk_zunmqr2 )
683 IF ( conda ) optwrk2 = max( optwrk2, lwcon )
684 optwrk2 = n + optwrk2
685 optwrk = max( optwrk, optwrk2 )
686 END IF
687 ELSE
688 CALL zgesvd( 'S', 'O', n, n, a, lda, s, u, ldu,
689 $ v, ldv, cdummy, -1, rdummy, ierr )
690 lwrk_zgesvd = int( cdummy(1) )
691 optwrk = max(lwrk_zgeqp3,lwrk_zgesvd,lwrk_zunmqr)
692 IF ( conda ) optwrk = max( optwrk, lwcon )
693 optwrk = n + optwrk
694 IF ( wntva ) THEN
695 CALL zgelqf(n/2,n,u,ldu,cdummy,cdummy,-1,ierr)
696 lwrk_zgelqf = int( cdummy(1) )
697 CALL zgesvd( 'S','O', n/2,n/2, v, ldv, s, u, ldu,
698 $ v, ldv, cdummy, -1, rdummy, ierr )
699 lwrk_zgesvd2 = int( cdummy(1) )
700 CALL zunmlq( 'R', 'N', n, n, n/2, u, ldu, cdummy,
701 $ v, ldv, cdummy,-1,ierr )
702 lwrk_zunmlq = int( cdummy(1) )
703 optwrk2 = max( lwrk_zgeqp3, n/2+lwrk_zgelqf,
704 $ n/2+lwrk_zgesvd2, n/2+lwrk_zunmlq )
705 IF ( conda ) optwrk2 = max( optwrk2, lwcon )
706 optwrk2 = n + optwrk2
707 optwrk = max( optwrk, optwrk2 )
708 END IF
709 END IF
710 END IF
711 END IF
712*
713 minwrk = max( 2, minwrk )
714 optwrk = max( 2, optwrk )
715 IF ( lcwork .LT. minwrk .AND. (.NOT.lquery) ) info = -19
716*
717 END IF
718*
719 IF (info .EQ. 0 .AND. lrwork .LT. rminwrk .AND. .NOT. lquery) THEN
720 info = -21
721 END IF
722 IF( info.NE.0 ) THEN
723 CALL xerbla( 'ZGESVDQ', -info )
724 RETURN
725 ELSE IF ( lquery ) THEN
726*
727* Return optimal workspace
728*
729 iwork(1) = iminwrk
730 cwork(1) = optwrk
731 cwork(2) = minwrk
732 rwork(1) = rminwrk
733 RETURN
734 END IF
735*
736* Quick return if the matrix is void.
737*
738 IF( ( m.EQ.0 ) .OR. ( n.EQ.0 ) ) THEN
739* .. all output is void.
740 RETURN
741 END IF
742*
743 big = dlamch('O')
744 ascaled = .false.
745 IF ( rowprm ) THEN
746* .. reordering the rows in decreasing sequence in the
747* ell-infinity norm - this enhances numerical robustness in
748* the case of differently scaled rows.
749 DO 1904 p = 1, m
750* RWORK(p) = ABS( A(p,IZAMAX(N,A(p,1),LDA)) )
751* [[ZLANGE will return NaN if an entry of the p-th row is Nan]]
752 rwork(p) = zlange( 'M', 1, n, a(p,1), lda, rdummy )
753* .. check for NaN's and Inf's
754 IF ( ( rwork(p) .NE. rwork(p) ) .OR.
755 $ ( (rwork(p)*zero) .NE. zero ) ) THEN
756 info = -8
757 CALL xerbla( 'ZGESVDQ', -info )
758 RETURN
759 END IF
760 1904 CONTINUE
761 DO 1952 p = 1, m - 1
762 q = idamax( m-p+1, rwork(p), 1 ) + p - 1
763 iwork(n+p) = q
764 IF ( p .NE. q ) THEN
765 rtmp = rwork(p)
766 rwork(p) = rwork(q)
767 rwork(q) = rtmp
768 END IF
769 1952 CONTINUE
770*
771 IF ( rwork(1) .EQ. zero ) THEN
772* Quick return: A is the M x N zero matrix.
773 numrank = 0
774 CALL dlaset( 'G', n, 1, zero, zero, s, n )
775 IF ( wntus ) CALL zlaset('G', m, n, czero, cone, u, ldu)
776 IF ( wntua ) CALL zlaset('G', m, m, czero, cone, u, ldu)
777 IF ( wntva ) CALL zlaset('G', n, n, czero, cone, v, ldv)
778 IF ( wntuf ) THEN
779 CALL zlaset( 'G', n, 1, czero, czero, cwork, n )
780 CALL zlaset( 'G', m, n, czero, cone, u, ldu )
781 END IF
782 DO 5001 p = 1, n
783 iwork(p) = p
784 5001 CONTINUE
785 IF ( rowprm ) THEN
786 DO 5002 p = n + 1, n + m - 1
787 iwork(p) = p - n
788 5002 CONTINUE
789 END IF
790 IF ( conda ) rwork(1) = -1
791 rwork(2) = -1
792 RETURN
793 END IF
794*
795 IF ( rwork(1) .GT. big / sqrt(dble(m)) ) THEN
796* .. to prevent overflow in the QR factorization, scale the
797* matrix by 1/sqrt(M) if too large entry detected
798 CALL zlascl('G',0,0,sqrt(dble(m)),one, m,n, a,lda, ierr)
799 ascaled = .true.
800 END IF
801 CALL zlaswp( n, a, lda, 1, m-1, iwork(n+1), 1 )
802 END IF
803*
804* .. At this stage, preemptive scaling is done only to avoid column
805* norms overflows during the QR factorization. The SVD procedure should
806* have its own scaling to save the singular values from overflows and
807* underflows. That depends on the SVD procedure.
808*
809 IF ( .NOT.rowprm ) THEN
810 rtmp = zlange( 'M', m, n, a, lda, rwork )
811 IF ( ( rtmp .NE. rtmp ) .OR.
812 $ ( (rtmp*zero) .NE. zero ) ) THEN
813 info = -8
814 CALL xerbla( 'ZGESVDQ', -info )
815 RETURN
816 END IF
817 IF ( rtmp .GT. big / sqrt(dble(m)) ) THEN
818* .. to prevent overflow in the QR factorization, scale the
819* matrix by 1/sqrt(M) if too large entry detected
820 CALL zlascl('G',0,0, sqrt(dble(m)),one, m,n, a,lda, ierr)
821 ascaled = .true.
822 END IF
823 END IF
824*
825* .. QR factorization with column pivoting
826*
827* A * P = Q * [ R ]
828* [ 0 ]
829*
830 DO 1963 p = 1, n
831* .. all columns are free columns
832 iwork(p) = 0
833 1963 CONTINUE
834 CALL zgeqp3( m, n, a, lda, iwork, cwork, cwork(n+1), lcwork-n,
835 $ rwork, ierr )
836*
837* If the user requested accuracy level allows truncation in the
838* computed upper triangular factor, the matrix R is examined and,
839* if possible, replaced with its leading upper trapezoidal part.
840*
841 epsln = dlamch('E')
842 sfmin = dlamch('S')
843* SMALL = SFMIN / EPSLN
844 nr = n
845*
846 IF ( accla ) THEN
847*
848* Standard absolute error bound suffices. All sigma_i with
849* sigma_i < N*EPS*||A||_F are flushed to zero. This is an
850* aggressive enforcement of lower numerical rank by introducing a
851* backward error of the order of N*EPS*||A||_F.
852 nr = 1
853 rtmp = sqrt(dble(n))*epsln
854 DO 3001 p = 2, n
855 IF ( abs(a(p,p)) .LT. (rtmp*abs(a(1,1))) ) GO TO 3002
856 nr = nr + 1
857 3001 CONTINUE
858 3002 CONTINUE
859*
860 ELSEIF ( acclm ) THEN
861* .. similarly as above, only slightly more gentle (less aggressive).
862* Sudden drop on the diagonal of R is used as the criterion for being
863* close-to-rank-deficient. The threshold is set to EPSLN=DLAMCH('E').
864* [[This can be made more flexible by replacing this hard-coded value
865* with a user specified threshold.]] Also, the values that underflow
866* will be truncated.
867 nr = 1
868 DO 3401 p = 2, n
869 IF ( ( abs(a(p,p)) .LT. (epsln*abs(a(p-1,p-1))) ) .OR.
870 $ ( abs(a(p,p)) .LT. sfmin ) ) GO TO 3402
871 nr = nr + 1
872 3401 CONTINUE
873 3402 CONTINUE
874*
875 ELSE
876* .. RRQR not authorized to determine numerical rank except in the
877* obvious case of zero pivots.
878* .. inspect R for exact zeros on the diagonal;
879* R(i,i)=0 => R(i:N,i:N)=0.
880 nr = 1
881 DO 3501 p = 2, n
882 IF ( abs(a(p,p)) .EQ. zero ) GO TO 3502
883 nr = nr + 1
884 3501 CONTINUE
885 3502 CONTINUE
886*
887 IF ( conda ) THEN
888* Estimate the scaled condition number of A. Use the fact that it is
889* the same as the scaled condition number of R.
890* .. V is used as workspace
891 CALL zlacpy( 'U', n, n, a, lda, v, ldv )
892* Only the leading NR x NR submatrix of the triangular factor
893* is considered. Only if NR=N will this give a reliable error
894* bound. However, even for NR < N, this can be used on an
895* expert level and obtain useful information in the sense of
896* perturbation theory.
897 DO 3053 p = 1, nr
898 rtmp = dznrm2( p, v(1,p), 1 )
899 CALL zdscal( p, one/rtmp, v(1,p), 1 )
900 3053 CONTINUE
901 IF ( .NOT. ( lsvec .OR. rsvec ) ) THEN
902 CALL zpocon( 'U', nr, v, ldv, one, rtmp,
903 $ cwork, rwork, ierr )
904 ELSE
905 CALL zpocon( 'U', nr, v, ldv, one, rtmp,
906 $ cwork(n+1), rwork, ierr )
907 END IF
908 sconda = one / sqrt(rtmp)
909* For NR=N, SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1),
910* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
911* See the reference [1] for more details.
912 END IF
913*
914 ENDIF
915*
916 IF ( wntur ) THEN
917 n1 = nr
918 ELSE IF ( wntus .OR. wntuf) THEN
919 n1 = n
920 ELSE IF ( wntua ) THEN
921 n1 = m
922 END IF
923*
924 IF ( .NOT. ( rsvec .OR. lsvec ) ) THEN
925*.......................................................................
926* .. only the singular values are requested
927*.......................................................................
928 IF ( rtrans ) THEN
929*
930* .. compute the singular values of R**H = [A](1:NR,1:N)**H
931* .. set the lower triangle of [A] to [A](1:NR,1:N)**H and
932* the upper triangle of [A] to zero.
933 DO 1146 p = 1, min( n, nr )
934 a(p,p) = conjg(a(p,p))
935 DO 1147 q = p + 1, n
936 a(q,p) = conjg(a(p,q))
937 IF ( q .LE. nr ) a(p,q) = czero
938 1147 CONTINUE
939 1146 CONTINUE
940*
941 CALL zgesvd( 'N', 'N', n, nr, a, lda, s, u, ldu,
942 $ v, ldv, cwork, lcwork, rwork, info )
943*
944 ELSE
945*
946* .. compute the singular values of R = [A](1:NR,1:N)
947*
948 IF ( nr .GT. 1 )
949 $ CALL zlaset( 'L', nr-1,nr-1, czero,czero, a(2,1), lda )
950 CALL zgesvd( 'N', 'N', nr, n, a, lda, s, u, ldu,
951 $ v, ldv, cwork, lcwork, rwork, info )
952*
953 END IF
954*
955 ELSE IF ( lsvec .AND. ( .NOT. rsvec) ) THEN
956*.......................................................................
957* .. the singular values and the left singular vectors requested
958*.......................................................................""""""""
959 IF ( rtrans ) THEN
960* .. apply ZGESVD to R**H
961* .. copy R**H into [U] and overwrite [U] with the right singular
962* vectors of R
963 DO 1192 p = 1, nr
964 DO 1193 q = p, n
965 u(q,p) = conjg(a(p,q))
966 1193 CONTINUE
967 1192 CONTINUE
968 IF ( nr .GT. 1 )
969 $ CALL zlaset( 'U', nr-1,nr-1, czero,czero, u(1,2), ldu )
970* .. the left singular vectors not computed, the NR right singular
971* vectors overwrite [U](1:NR,1:NR) as conjugate transposed. These
972* will be pre-multiplied by Q to build the left singular vectors of A.
973 CALL zgesvd( 'N', 'O', n, nr, u, ldu, s, u, ldu,
974 $ u, ldu, cwork(n+1), lcwork-n, rwork, info )
975*
976 DO 1119 p = 1, nr
977 u(p,p) = conjg(u(p,p))
978 DO 1120 q = p + 1, nr
979 ctmp = conjg(u(q,p))
980 u(q,p) = conjg(u(p,q))
981 u(p,q) = ctmp
982 1120 CONTINUE
983 1119 CONTINUE
984*
985 ELSE
986* .. apply ZGESVD to R
987* .. copy R into [U] and overwrite [U] with the left singular vectors
988 CALL zlacpy( 'U', nr, n, a, lda, u, ldu )
989 IF ( nr .GT. 1 )
990 $ CALL zlaset( 'L', nr-1, nr-1, czero, czero, u(2,1), ldu )
991* .. the right singular vectors not computed, the NR left singular
992* vectors overwrite [U](1:NR,1:NR)
993 CALL zgesvd( 'O', 'N', nr, n, u, ldu, s, u, ldu,
994 $ v, ldv, cwork(n+1), lcwork-n, rwork, info )
995* .. now [U](1:NR,1:NR) contains the NR left singular vectors of
996* R. These will be pre-multiplied by Q to build the left singular
997* vectors of A.
998 END IF
999*
1000* .. assemble the left singular vector matrix U of dimensions
1001* (M x NR) or (M x N) or (M x M).
1002 IF ( ( nr .LT. m ) .AND. ( .NOT.wntuf ) ) THEN
1003 CALL zlaset('A', m-nr, nr, czero, czero, u(nr+1,1), ldu)
1004 IF ( nr .LT. n1 ) THEN
1005 CALL zlaset( 'A',nr,n1-nr,czero,czero,u(1,nr+1), ldu )
1006 CALL zlaset( 'A',m-nr,n1-nr,czero,cone,
1007 $ u(nr+1,nr+1), ldu )
1008 END IF
1009 END IF
1010*
1011* The Q matrix from the first QRF is built into the left singular
1012* vectors matrix U.
1013*
1014 IF ( .NOT.wntuf )
1015 $ CALL zunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,
1016 $ ldu, cwork(n+1), lcwork-n, ierr )
1017 IF ( rowprm .AND. .NOT.wntuf )
1018 $ CALL zlaswp( n1, u, ldu, 1, m-1, iwork(n+1), -1 )
1019*
1020 ELSE IF ( rsvec .AND. ( .NOT. lsvec ) ) THEN
1021*.......................................................................
1022* .. the singular values and the right singular vectors requested
1023*.......................................................................
1024 IF ( rtrans ) THEN
1025* .. apply ZGESVD to R**H
1026* .. copy R**H into V and overwrite V with the left singular vectors
1027 DO 1165 p = 1, nr
1028 DO 1166 q = p, n
1029 v(q,p) = conjg(a(p,q))
1030 1166 CONTINUE
1031 1165 CONTINUE
1032 IF ( nr .GT. 1 )
1033 $ CALL zlaset( 'U', nr-1,nr-1, czero,czero, v(1,2), ldv )
1034* .. the left singular vectors of R**H overwrite V, the right singular
1035* vectors not computed
1036 IF ( wntvr .OR. ( nr .EQ. n ) ) THEN
1037 CALL zgesvd( 'O', 'N', n, nr, v, ldv, s, u, ldu,
1038 $ u, ldu, cwork(n+1), lcwork-n, rwork, info )
1039*
1040 DO 1121 p = 1, nr
1041 v(p,p) = conjg(v(p,p))
1042 DO 1122 q = p + 1, nr
1043 ctmp = conjg(v(q,p))
1044 v(q,p) = conjg(v(p,q))
1045 v(p,q) = ctmp
1046 1122 CONTINUE
1047 1121 CONTINUE
1048*
1049 IF ( nr .LT. n ) THEN
1050 DO 1103 p = 1, nr
1051 DO 1104 q = nr + 1, n
1052 v(p,q) = conjg(v(q,p))
1053 1104 CONTINUE
1054 1103 CONTINUE
1055 END IF
1056 CALL zlapmt( .false., nr, n, v, ldv, iwork )
1057 ELSE
1058* .. need all N right singular vectors and NR < N
1059* [!] This is simple implementation that augments [V](1:N,1:NR)
1060* by padding a zero block. In the case NR << N, a more efficient
1061* way is to first use the QR factorization. For more details
1062* how to implement this, see the " FULL SVD " branch.
1063 CALL zlaset('G', n, n-nr, czero, czero, v(1,nr+1), ldv)
1064 CALL zgesvd( 'O', 'N', n, n, v, ldv, s, u, ldu,
1065 $ u, ldu, cwork(n+1), lcwork-n, rwork, info )
1066*
1067 DO 1123 p = 1, n
1068 v(p,p) = conjg(v(p,p))
1069 DO 1124 q = p + 1, n
1070 ctmp = conjg(v(q,p))
1071 v(q,p) = conjg(v(p,q))
1072 v(p,q) = ctmp
1073 1124 CONTINUE
1074 1123 CONTINUE
1075 CALL zlapmt( .false., n, n, v, ldv, iwork )
1076 END IF
1077*
1078 ELSE
1079* .. aply ZGESVD to R
1080* .. copy R into V and overwrite V with the right singular vectors
1081 CALL zlacpy( 'U', nr, n, a, lda, v, ldv )
1082 IF ( nr .GT. 1 )
1083 $ CALL zlaset( 'L', nr-1, nr-1, czero, czero, v(2,1), ldv )
1084* .. the right singular vectors overwrite V, the NR left singular
1085* vectors stored in U(1:NR,1:NR)
1086 IF ( wntvr .OR. ( nr .EQ. n ) ) THEN
1087 CALL zgesvd( 'N', 'O', nr, n, v, ldv, s, u, ldu,
1088 $ v, ldv, cwork(n+1), lcwork-n, rwork, info )
1089 CALL zlapmt( .false., nr, n, v, ldv, iwork )
1090* .. now [V](1:NR,1:N) contains V(1:N,1:NR)**H
1091 ELSE
1092* .. need all N right singular vectors and NR < N
1093* [!] This is simple implementation that augments [V](1:NR,1:N)
1094* by padding a zero block. In the case NR << N, a more efficient
1095* way is to first use the LQ factorization. For more details
1096* how to implement this, see the " FULL SVD " branch.
1097 CALL zlaset('G', n-nr, n, czero,czero, v(nr+1,1), ldv)
1098 CALL zgesvd( 'N', 'O', n, n, v, ldv, s, u, ldu,
1099 $ v, ldv, cwork(n+1), lcwork-n, rwork, info )
1100 CALL zlapmt( .false., n, n, v, ldv, iwork )
1101 END IF
1102* .. now [V] contains the adjoint of the matrix of the right singular
1103* vectors of A.
1104 END IF
1105*
1106 ELSE
1107*.......................................................................
1108* .. FULL SVD requested
1109*.......................................................................
1110 IF ( rtrans ) THEN
1111*
1112* .. apply ZGESVD to R**H [[this option is left for R&D&T]]
1113*
1114 IF ( wntvr .OR. ( nr .EQ. n ) ) THEN
1115* .. copy R**H into [V] and overwrite [V] with the left singular
1116* vectors of R**H
1117 DO 1168 p = 1, nr
1118 DO 1169 q = p, n
1119 v(q,p) = conjg(a(p,q))
1120 1169 CONTINUE
1121 1168 CONTINUE
1122 IF ( nr .GT. 1 )
1123 $ CALL zlaset( 'U', nr-1,nr-1, czero,czero, v(1,2), ldv )
1124*
1125* .. the left singular vectors of R**H overwrite [V], the NR right
1126* singular vectors of R**H stored in [U](1:NR,1:NR) as conjugate
1127* transposed
1128 CALL zgesvd( 'O', 'A', n, nr, v, ldv, s, v, ldv,
1129 $ u, ldu, cwork(n+1), lcwork-n, rwork, info )
1130* .. assemble V
1131 DO 1115 p = 1, nr
1132 v(p,p) = conjg(v(p,p))
1133 DO 1116 q = p + 1, nr
1134 ctmp = conjg(v(q,p))
1135 v(q,p) = conjg(v(p,q))
1136 v(p,q) = ctmp
1137 1116 CONTINUE
1138 1115 CONTINUE
1139 IF ( nr .LT. n ) THEN
1140 DO 1101 p = 1, nr
1141 DO 1102 q = nr+1, n
1142 v(p,q) = conjg(v(q,p))
1143 1102 CONTINUE
1144 1101 CONTINUE
1145 END IF
1146 CALL zlapmt( .false., nr, n, v, ldv, iwork )
1147*
1148 DO 1117 p = 1, nr
1149 u(p,p) = conjg(u(p,p))
1150 DO 1118 q = p + 1, nr
1151 ctmp = conjg(u(q,p))
1152 u(q,p) = conjg(u(p,q))
1153 u(p,q) = ctmp
1154 1118 CONTINUE
1155 1117 CONTINUE
1156*
1157 IF ( ( nr .LT. m ) .AND. .NOT.(wntuf)) THEN
1158 CALL zlaset('A', m-nr,nr, czero,czero, u(nr+1,1), ldu)
1159 IF ( nr .LT. n1 ) THEN
1160 CALL zlaset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu)
1161 CALL zlaset( 'A',m-nr,n1-nr,czero,cone,
1162 $ u(nr+1,nr+1), ldu )
1163 END IF
1164 END IF
1165*
1166 ELSE
1167* .. need all N right singular vectors and NR < N
1168* .. copy R**H into [V] and overwrite [V] with the left singular
1169* vectors of R**H
1170* [[The optimal ratio N/NR for using QRF instead of padding
1171* with zeros. Here hard coded to 2; it must be at least
1172* two due to work space constraints.]]
1173* OPTRATIO = ILAENV(6, 'ZGESVD', 'S' // 'O', NR,N,0,0)
1174* OPTRATIO = MAX( OPTRATIO, 2 )
1175 optratio = 2
1176 IF ( optratio*nr .GT. n ) THEN
1177 DO 1198 p = 1, nr
1178 DO 1199 q = p, n
1179 v(q,p) = conjg(a(p,q))
1180 1199 CONTINUE
1181 1198 CONTINUE
1182 IF ( nr .GT. 1 )
1183 $ CALL zlaset('U',nr-1,nr-1, czero,czero, v(1,2),ldv)
1184*
1185 CALL zlaset('A',n,n-nr,czero,czero,v(1,nr+1),ldv)
1186 CALL zgesvd( 'O', 'A', n, n, v, ldv, s, v, ldv,
1187 $ u, ldu, cwork(n+1), lcwork-n, rwork, info )
1188*
1189 DO 1113 p = 1, n
1190 v(p,p) = conjg(v(p,p))
1191 DO 1114 q = p + 1, n
1192 ctmp = conjg(v(q,p))
1193 v(q,p) = conjg(v(p,q))
1194 v(p,q) = ctmp
1195 1114 CONTINUE
1196 1113 CONTINUE
1197 CALL zlapmt( .false., n, n, v, ldv, iwork )
1198* .. assemble the left singular vector matrix U of dimensions
1199* (M x N1), i.e. (M x N) or (M x M).
1200*
1201 DO 1111 p = 1, n
1202 u(p,p) = conjg(u(p,p))
1203 DO 1112 q = p + 1, n
1204 ctmp = conjg(u(q,p))
1205 u(q,p) = conjg(u(p,q))
1206 u(p,q) = ctmp
1207 1112 CONTINUE
1208 1111 CONTINUE
1209*
1210 IF ( ( n .LT. m ) .AND. .NOT.(wntuf)) THEN
1211 CALL zlaset('A',m-n,n,czero,czero,u(n+1,1),ldu)
1212 IF ( n .LT. n1 ) THEN
1213 CALL zlaset('A',n,n1-n,czero,czero,u(1,n+1),ldu)
1214 CALL zlaset('A',m-n,n1-n,czero,cone,
1215 $ u(n+1,n+1), ldu )
1216 END IF
1217 END IF
1218 ELSE
1219* .. copy R**H into [U] and overwrite [U] with the right
1220* singular vectors of R
1221 DO 1196 p = 1, nr
1222 DO 1197 q = p, n
1223 u(q,nr+p) = conjg(a(p,q))
1224 1197 CONTINUE
1225 1196 CONTINUE
1226 IF ( nr .GT. 1 )
1227 $ CALL zlaset('U',nr-1,nr-1,czero,czero,u(1,nr+2),ldu)
1228 CALL zgeqrf( n, nr, u(1,nr+1), ldu, cwork(n+1),
1229 $ cwork(n+nr+1), lcwork-n-nr, ierr )
1230 DO 1143 p = 1, nr
1231 DO 1144 q = 1, n
1232 v(q,p) = conjg(u(p,nr+q))
1233 1144 CONTINUE
1234 1143 CONTINUE
1235 CALL zlaset('U',nr-1,nr-1,czero,czero,v(1,2),ldv)
1236 CALL zgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,
1237 $ v,ldv, cwork(n+nr+1),lcwork-n-nr,rwork, info )
1238 CALL zlaset('A',n-nr,nr,czero,czero,v(nr+1,1),ldv)
1239 CALL zlaset('A',nr,n-nr,czero,czero,v(1,nr+1),ldv)
1240 CALL zlaset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv)
1241 CALL zunmqr('R','C', n, n, nr, u(1,nr+1), ldu,
1242 $ cwork(n+1),v,ldv,cwork(n+nr+1),lcwork-n-nr,ierr)
1243 CALL zlapmt( .false., n, n, v, ldv, iwork )
1244* .. assemble the left singular vector matrix U of dimensions
1245* (M x NR) or (M x N) or (M x M).
1246 IF ( ( nr .LT. m ) .AND. .NOT.(wntuf)) THEN
1247 CALL zlaset('A',m-nr,nr,czero,czero,u(nr+1,1),ldu)
1248 IF ( nr .LT. n1 ) THEN
1249 CALL zlaset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu)
1250 CALL zlaset( 'A',m-nr,n1-nr,czero,cone,
1251 $ u(nr+1,nr+1),ldu)
1252 END IF
1253 END IF
1254 END IF
1255 END IF
1256*
1257 ELSE
1258*
1259* .. apply ZGESVD to R [[this is the recommended option]]
1260*
1261 IF ( wntvr .OR. ( nr .EQ. n ) ) THEN
1262* .. copy R into [V] and overwrite V with the right singular vectors
1263 CALL zlacpy( 'U', nr, n, a, lda, v, ldv )
1264 IF ( nr .GT. 1 )
1265 $ CALL zlaset( 'L', nr-1,nr-1, czero,czero, v(2,1), ldv )
1266* .. the right singular vectors of R overwrite [V], the NR left
1267* singular vectors of R stored in [U](1:NR,1:NR)
1268 CALL zgesvd( 'S', 'O', nr, n, v, ldv, s, u, ldu,
1269 $ v, ldv, cwork(n+1), lcwork-n, rwork, info )
1270 CALL zlapmt( .false., nr, n, v, ldv, iwork )
1271* .. now [V](1:NR,1:N) contains V(1:N,1:NR)**H
1272* .. assemble the left singular vector matrix U of dimensions
1273* (M x NR) or (M x N) or (M x M).
1274 IF ( ( nr .LT. m ) .AND. .NOT.(wntuf)) THEN
1275 CALL zlaset('A', m-nr,nr, czero,czero, u(nr+1,1), ldu)
1276 IF ( nr .LT. n1 ) THEN
1277 CALL zlaset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu)
1278 CALL zlaset( 'A',m-nr,n1-nr,czero,cone,
1279 $ u(nr+1,nr+1), ldu )
1280 END IF
1281 END IF
1282*
1283 ELSE
1284* .. need all N right singular vectors and NR < N
1285* .. the requested number of the left singular vectors
1286* is then N1 (N or M)
1287* [[The optimal ratio N/NR for using LQ instead of padding
1288* with zeros. Here hard coded to 2; it must be at least
1289* two due to work space constraints.]]
1290* OPTRATIO = ILAENV(6, 'ZGESVD', 'S' // 'O', NR,N,0,0)
1291* OPTRATIO = MAX( OPTRATIO, 2 )
1292 optratio = 2
1293 IF ( optratio * nr .GT. n ) THEN
1294 CALL zlacpy( 'U', nr, n, a, lda, v, ldv )
1295 IF ( nr .GT. 1 )
1296 $ CALL zlaset('L', nr-1,nr-1, czero,czero, v(2,1),ldv)
1297* .. the right singular vectors of R overwrite [V], the NR left
1298* singular vectors of R stored in [U](1:NR,1:NR)
1299 CALL zlaset('A', n-nr,n, czero,czero, v(nr+1,1),ldv)
1300 CALL zgesvd( 'S', 'O', n, n, v, ldv, s, u, ldu,
1301 $ v, ldv, cwork(n+1), lcwork-n, rwork, info )
1302 CALL zlapmt( .false., n, n, v, ldv, iwork )
1303* .. now [V] contains the adjoint of the matrix of the right
1304* singular vectors of A. The leading N left singular vectors
1305* are in [U](1:N,1:N)
1306* .. assemble the left singular vector matrix U of dimensions
1307* (M x N1), i.e. (M x N) or (M x M).
1308 IF ( ( n .LT. m ) .AND. .NOT.(wntuf)) THEN
1309 CALL zlaset('A',m-n,n,czero,czero,u(n+1,1),ldu)
1310 IF ( n .LT. n1 ) THEN
1311 CALL zlaset('A',n,n1-n,czero,czero,u(1,n+1),ldu)
1312 CALL zlaset( 'A',m-n,n1-n,czero,cone,
1313 $ u(n+1,n+1), ldu )
1314 END IF
1315 END IF
1316 ELSE
1317 CALL zlacpy( 'U', nr, n, a, lda, u(nr+1,1), ldu )
1318 IF ( nr .GT. 1 )
1319 $ CALL zlaset('L',nr-1,nr-1,czero,czero,u(nr+2,1),ldu)
1320 CALL zgelqf( nr, n, u(nr+1,1), ldu, cwork(n+1),
1321 $ cwork(n+nr+1), lcwork-n-nr, ierr )
1322 CALL zlacpy('L',nr,nr,u(nr+1,1),ldu,v,ldv)
1323 IF ( nr .GT. 1 )
1324 $ CALL zlaset('U',nr-1,nr-1,czero,czero,v(1,2),ldv)
1325 CALL zgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,
1326 $ v, ldv, cwork(n+nr+1), lcwork-n-nr, rwork, info )
1327 CALL zlaset('A',n-nr,nr,czero,czero,v(nr+1,1),ldv)
1328 CALL zlaset('A',nr,n-nr,czero,czero,v(1,nr+1),ldv)
1329 CALL zlaset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv)
1330 CALL zunmlq('R','N',n,n,nr,u(nr+1,1),ldu,cwork(n+1),
1331 $ v, ldv, cwork(n+nr+1),lcwork-n-nr,ierr)
1332 CALL zlapmt( .false., n, n, v, ldv, iwork )
1333* .. assemble the left singular vector matrix U of dimensions
1334* (M x NR) or (M x N) or (M x M).
1335 IF ( ( nr .LT. m ) .AND. .NOT.(wntuf)) THEN
1336 CALL zlaset('A',m-nr,nr,czero,czero,u(nr+1,1),ldu)
1337 IF ( nr .LT. n1 ) THEN
1338 CALL zlaset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu)
1339 CALL zlaset( 'A',m-nr,n1-nr,czero,cone,
1340 $ u(nr+1,nr+1), ldu )
1341 END IF
1342 END IF
1343 END IF
1344 END IF
1345* .. end of the "R**H or R" branch
1346 END IF
1347*
1348* The Q matrix from the first QRF is built into the left singular
1349* vectors matrix U.
1350*
1351 IF ( .NOT. wntuf )
1352 $ CALL zunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,
1353 $ ldu, cwork(n+1), lcwork-n, ierr )
1354 IF ( rowprm .AND. .NOT.wntuf )
1355 $ CALL zlaswp( n1, u, ldu, 1, m-1, iwork(n+1), -1 )
1356*
1357* ... end of the "full SVD" branch
1358 END IF
1359*
1360* Check whether some singular values are returned as zeros, e.g.
1361* due to underflow, and update the numerical rank.
1362 p = nr
1363 DO 4001 q = p, 1, -1
1364 IF ( s(q) .GT. zero ) GO TO 4002
1365 nr = nr - 1
1366 4001 CONTINUE
1367 4002 CONTINUE
1368*
1369* .. if numerical rank deficiency is detected, the truncated
1370* singular values are set to zero.
1371 IF ( nr .LT. n ) CALL dlaset( 'G', n-nr,1, zero,zero, s(nr+1), n )
1372* .. undo scaling; this may cause overflow in the largest singular
1373* values.
1374 IF ( ascaled )
1375 $ CALL dlascl( 'G',0,0, one,sqrt(dble(m)), nr,1, s, n, ierr )
1376 IF ( conda ) rwork(1) = sconda
1377 rwork(2) = p - nr
1378* .. p-NR is the number of singular values that are computed as
1379* exact zeros in ZGESVD() applied to the (possibly truncated)
1380* full row rank triangular (trapezoidal) factor of A.
1381 numrank = nr
1382*
1383 RETURN
1384*
1385* End of ZGESVDQ
1386*
subroutine dlaset(uplo, m, n, alpha, beta, a, lda)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition dlaset.f:110
subroutine zgesvd(jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, rwork, info)
ZGESVD computes the singular value decomposition (SVD) for GE matrices
Definition zgesvd.f:214
subroutine zlapmt(forwrd, m, n, x, ldx, k)
ZLAPMT performs a forward or backward permutation of the columns of a matrix.
Definition zlapmt.f:104

◆ zgesvdx()

subroutine zgesvdx ( character jobu,
character jobvt,
character range,
integer m,
integer n,
complex*16, dimension( lda, * ) a,
integer lda,
double precision vl,
double precision vu,
integer il,
integer iu,
integer ns,
double precision, dimension( * ) s,
complex*16, dimension( ldu, * ) u,
integer ldu,
complex*16, dimension( ldvt, * ) vt,
integer ldvt,
complex*16, dimension( * ) work,
integer lwork,
double precision, dimension( * ) rwork,
integer, dimension( * ) iwork,
integer info )

ZGESVDX computes the singular value decomposition (SVD) for GE matrices

Download ZGESVDX + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!>  ZGESVDX computes the singular value decomposition (SVD) of a complex
!>  M-by-N matrix A, optionally computing the left and/or right singular
!>  vectors. The SVD is written
!>
!>      A = U * SIGMA * transpose(V)
!>
!>  where SIGMA is an M-by-N matrix which is zero except for its
!>  min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
!>  V is an N-by-N unitary matrix.  The diagonal elements of SIGMA
!>  are the singular values of A; they are real and non-negative, and
!>  are returned in descending order.  The first min(m,n) columns of
!>  U and V are the left and right singular vectors of A.
!>
!>  ZGESVDX uses an eigenvalue problem for obtaining the SVD, which
!>  allows for the computation of a subset of singular values and
!>  vectors. See DBDSVDX for details.
!>
!>  Note that the routine returns V**T, not V.
!> 
Parameters
[in]JOBU
!>          JOBU is CHARACTER*1
!>          Specifies options for computing all or part of the matrix U:
!>          = 'V':  the first min(m,n) columns of U (the left singular
!>                  vectors) or as specified by RANGE are returned in
!>                  the array U;
!>          = 'N':  no columns of U (no left singular vectors) are
!>                  computed.
!> 
[in]JOBVT
!>          JOBVT is CHARACTER*1
!>           Specifies options for computing all or part of the matrix
!>           V**T:
!>           = 'V':  the first min(m,n) rows of V**T (the right singular
!>                   vectors) or as specified by RANGE are returned in
!>                   the array VT;
!>           = 'N':  no rows of V**T (no right singular vectors) are
!>                   computed.
!> 
[in]RANGE
!>          RANGE is CHARACTER*1
!>          = 'A': all singular values will be found.
!>          = 'V': all singular values in the half-open interval (VL,VU]
!>                 will be found.
!>          = 'I': the IL-th through IU-th singular values will be found.
!> 
[in]M
!>          M is INTEGER
!>          The number of rows of the input matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>          The number of columns of the input matrix A.  N >= 0.
!> 
[in,out]A
!>          A is COMPLEX*16 array, dimension (LDA,N)
!>          On entry, the M-by-N matrix A.
!>          On exit, the contents of A are destroyed.
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A.  LDA >= max(1,M).
!> 
[in]VL
!>          VL is DOUBLE PRECISION
!>          If RANGE='V', the lower bound of the interval to
!>          be searched for singular values. VU > VL.
!>          Not referenced if RANGE = 'A' or 'I'.
!> 
[in]VU
!>          VU is DOUBLE PRECISION
!>          If RANGE='V', the upper bound of the interval to
!>          be searched for singular values. VU > VL.
!>          Not referenced if RANGE = 'A' or 'I'.
!> 
[in]IL
!>          IL is INTEGER
!>          If RANGE='I', the index of the
!>          smallest singular value to be returned.
!>          1 <= IL <= IU <= min(M,N), if min(M,N) > 0.
!>          Not referenced if RANGE = 'A' or 'V'.
!> 
[in]IU
!>          IU is INTEGER
!>          If RANGE='I', the index of the
!>          largest singular value to be returned.
!>          1 <= IL <= IU <= min(M,N), if min(M,N) > 0.
!>          Not referenced if RANGE = 'A' or 'V'.
!> 
[out]NS
!>          NS is INTEGER
!>          The total number of singular values found,
!>          0 <= NS <= min(M,N).
!>          If RANGE = 'A', NS = min(M,N); if RANGE = 'I', NS = IU-IL+1.
!> 
[out]S
!>          S is DOUBLE PRECISION array, dimension (min(M,N))
!>          The singular values of A, sorted so that S(i) >= S(i+1).
!> 
[out]U
!>          U is COMPLEX*16 array, dimension (LDU,UCOL)
!>          If JOBU = 'V', U contains columns of U (the left singular
!>          vectors, stored columnwise) as specified by RANGE; if
!>          JOBU = 'N', U is not referenced.
!>          Note: The user must ensure that UCOL >= NS; if RANGE = 'V',
!>          the exact value of NS is not known in advance and an upper
!>          bound must be used.
!> 
[in]LDU
!>          LDU is INTEGER
!>          The leading dimension of the array U.  LDU >= 1; if
!>          JOBU = 'V', LDU >= M.
!> 
[out]VT
!>          VT is COMPLEX*16 array, dimension (LDVT,N)
!>          If JOBVT = 'V', VT contains the rows of V**T (the right singular
!>          vectors, stored rowwise) as specified by RANGE; if JOBVT = 'N',
!>          VT is not referenced.
!>          Note: The user must ensure that LDVT >= NS; if RANGE = 'V',
!>          the exact value of NS is not known in advance and an upper
!>          bound must be used.
!> 
[in]LDVT
!>          LDVT is INTEGER
!>          The leading dimension of the array VT.  LDVT >= 1; if
!>          JOBVT = 'V', LDVT >= NS (see above).
!> 
[out]WORK
!>          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
!>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          The dimension of the array WORK.
!>          LWORK >= MAX(1,MIN(M,N)*(MIN(M,N)+4)) for the paths (see
!>          comments inside the code):
!>             - PATH 1  (M much larger than N)
!>             - PATH 1t (N much larger than M)
!>          LWORK >= MAX(1,MIN(M,N)*2+MAX(M,N)) for the other paths.
!>          For good performance, LWORK should generally be larger.
!>
!>          If LWORK = -1, then a workspace query is assumed; the routine
!>          only calculates the optimal size of the WORK array, returns
!>          this value as the first entry of the WORK array, and no error
!>          message related to LWORK is issued by XERBLA.
!> 
[out]RWORK
!>          RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK))
!>          LRWORK >= MIN(M,N)*(MIN(M,N)*2+15*MIN(M,N)).
!> 
[out]IWORK
!>          IWORK is INTEGER array, dimension (12*MIN(M,N))
!>          If INFO = 0, the first NS elements of IWORK are zero. If INFO > 0,
!>          then IWORK contains the indices of the eigenvectors that failed
!>          to converge in DBDSVDX/DSTEVX.
!> 
[out]INFO
!>     INFO is INTEGER
!>           = 0:  successful exit
!>           < 0:  if INFO = -i, the i-th argument had an illegal value
!>           > 0:  if INFO = i, then i eigenvectors failed to converge
!>                 in DBDSVDX/DSTEVX.
!>                 if INFO = N*2 + 1, an internal error occurred in
!>                 DBDSVDX
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 267 of file zgesvdx.f.

270*
271* -- LAPACK driver routine --
272* -- LAPACK is a software package provided by Univ. of Tennessee, --
273* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
274*
275* .. Scalar Arguments ..
276 CHARACTER JOBU, JOBVT, RANGE
277 INTEGER IL, INFO, IU, LDA, LDU, LDVT, LWORK, M, N, NS
278 DOUBLE PRECISION VL, VU
279* ..
280* .. Array Arguments ..
281 INTEGER IWORK( * )
282 DOUBLE PRECISION S( * ), RWORK( * )
283 COMPLEX*16 A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
284 $ WORK( * )
285* ..
286*
287* =====================================================================
288*
289* .. Parameters ..
290 COMPLEX*16 CZERO, CONE
291 parameter( czero = ( 0.0d0, 0.0d0 ),
292 $ cone = ( 1.0d0, 0.0d0 ) )
293 DOUBLE PRECISION ZERO, ONE
294 parameter( zero = 0.0d0, one = 1.0d0 )
295* ..
296* .. Local Scalars ..
297 CHARACTER JOBZ, RNGTGK
298 LOGICAL ALLS, INDS, LQUERY, VALS, WANTU, WANTVT
299 INTEGER I, ID, IE, IERR, ILQF, ILTGK, IQRF, ISCL,
300 $ ITAU, ITAUP, ITAUQ, ITEMP, ITEMPR, ITGKZ,
301 $ IUTGK, J, K, MAXWRK, MINMN, MINWRK, MNTHR
302 DOUBLE PRECISION ABSTOL, ANRM, BIGNUM, EPS, SMLNUM
303* ..
304* .. Local Arrays ..
305 DOUBLE PRECISION DUM( 1 )
306* ..
307* .. External Subroutines ..
308 EXTERNAL zgebrd, zgelqf, zgeqrf, zlascl, zlaset, zlacpy,
310* ..
311* .. External Functions ..
312 LOGICAL LSAME
313 INTEGER ILAENV
314 DOUBLE PRECISION DLAMCH, ZLANGE
315 EXTERNAL lsame, ilaenv, dlamch, zlange
316* ..
317* .. Intrinsic Functions ..
318 INTRINSIC max, min, sqrt
319* ..
320* .. Executable Statements ..
321*
322* Test the input arguments.
323*
324 ns = 0
325 info = 0
326 abstol = 2*dlamch('S')
327 lquery = ( lwork.EQ.-1 )
328 minmn = min( m, n )
329
330 wantu = lsame( jobu, 'V' )
331 wantvt = lsame( jobvt, 'V' )
332 IF( wantu .OR. wantvt ) THEN
333 jobz = 'V'
334 ELSE
335 jobz = 'N'
336 END IF
337 alls = lsame( range, 'A' )
338 vals = lsame( range, 'V' )
339 inds = lsame( range, 'I' )
340*
341 info = 0
342 IF( .NOT.lsame( jobu, 'V' ) .AND.
343 $ .NOT.lsame( jobu, 'N' ) ) THEN
344 info = -1
345 ELSE IF( .NOT.lsame( jobvt, 'V' ) .AND.
346 $ .NOT.lsame( jobvt, 'N' ) ) THEN
347 info = -2
348 ELSE IF( .NOT.( alls .OR. vals .OR. inds ) ) THEN
349 info = -3
350 ELSE IF( m.LT.0 ) THEN
351 info = -4
352 ELSE IF( n.LT.0 ) THEN
353 info = -5
354 ELSE IF( m.GT.lda ) THEN
355 info = -7
356 ELSE IF( minmn.GT.0 ) THEN
357 IF( vals ) THEN
358 IF( vl.LT.zero ) THEN
359 info = -8
360 ELSE IF( vu.LE.vl ) THEN
361 info = -9
362 END IF
363 ELSE IF( inds ) THEN
364 IF( il.LT.1 .OR. il.GT.max( 1, minmn ) ) THEN
365 info = -10
366 ELSE IF( iu.LT.min( minmn, il ) .OR. iu.GT.minmn ) THEN
367 info = -11
368 END IF
369 END IF
370 IF( info.EQ.0 ) THEN
371 IF( wantu .AND. ldu.LT.m ) THEN
372 info = -15
373 ELSE IF( wantvt ) THEN
374 IF( inds ) THEN
375 IF( ldvt.LT.iu-il+1 ) THEN
376 info = -17
377 END IF
378 ELSE IF( ldvt.LT.minmn ) THEN
379 info = -17
380 END IF
381 END IF
382 END IF
383 END IF
384*
385* Compute workspace
386* (Note: Comments in the code beginning "Workspace:" describe the
387* minimal amount of workspace needed at that point in the code,
388* as well as the preferred amount for good performance.
389* NB refers to the optimal block size for the immediately
390* following subroutine, as returned by ILAENV.)
391*
392 IF( info.EQ.0 ) THEN
393 minwrk = 1
394 maxwrk = 1
395 IF( minmn.GT.0 ) THEN
396 IF( m.GE.n ) THEN
397 mnthr = ilaenv( 6, 'ZGESVD', jobu // jobvt, m, n, 0, 0 )
398 IF( m.GE.mnthr ) THEN
399*
400* Path 1 (M much larger than N)
401*
402 minwrk = n*(n+5)
403 maxwrk = n + n*ilaenv(1,'ZGEQRF',' ',m,n,-1,-1)
404 maxwrk = max(maxwrk,
405 $ n*n+2*n+2*n*ilaenv(1,'ZGEBRD',' ',n,n,-1,-1))
406 IF (wantu .OR. wantvt) THEN
407 maxwrk = max(maxwrk,
408 $ n*n+2*n+n*ilaenv(1,'ZUNMQR','LN',n,n,n,-1))
409 END IF
410 ELSE
411*
412* Path 2 (M at least N, but not much larger)
413*
414 minwrk = 3*n + m
415 maxwrk = 2*n + (m+n)*ilaenv(1,'ZGEBRD',' ',m,n,-1,-1)
416 IF (wantu .OR. wantvt) THEN
417 maxwrk = max(maxwrk,
418 $ 2*n+n*ilaenv(1,'ZUNMQR','LN',n,n,n,-1))
419 END IF
420 END IF
421 ELSE
422 mnthr = ilaenv( 6, 'ZGESVD', jobu // jobvt, m, n, 0, 0 )
423 IF( n.GE.mnthr ) THEN
424*
425* Path 1t (N much larger than M)
426*
427 minwrk = m*(m+5)
428 maxwrk = m + m*ilaenv(1,'ZGELQF',' ',m,n,-1,-1)
429 maxwrk = max(maxwrk,
430 $ m*m+2*m+2*m*ilaenv(1,'ZGEBRD',' ',m,m,-1,-1))
431 IF (wantu .OR. wantvt) THEN
432 maxwrk = max(maxwrk,
433 $ m*m+2*m+m*ilaenv(1,'ZUNMQR','LN',m,m,m,-1))
434 END IF
435 ELSE
436*
437* Path 2t (N greater than M, but not much larger)
438*
439*
440 minwrk = 3*m + n
441 maxwrk = 2*m + (m+n)*ilaenv(1,'ZGEBRD',' ',m,n,-1,-1)
442 IF (wantu .OR. wantvt) THEN
443 maxwrk = max(maxwrk,
444 $ 2*m+m*ilaenv(1,'ZUNMQR','LN',m,m,m,-1))
445 END IF
446 END IF
447 END IF
448 END IF
449 maxwrk = max( maxwrk, minwrk )
450 work( 1 ) = dcmplx( dble( maxwrk ), zero )
451*
452 IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
453 info = -19
454 END IF
455 END IF
456*
457 IF( info.NE.0 ) THEN
458 CALL xerbla( 'ZGESVDX', -info )
459 RETURN
460 ELSE IF( lquery ) THEN
461 RETURN
462 END IF
463*
464* Quick return if possible
465*
466 IF( m.EQ.0 .OR. n.EQ.0 ) THEN
467 RETURN
468 END IF
469*
470* Set singular values indices accord to RANGE='A'.
471*
472 IF( alls ) THEN
473 rngtgk = 'I'
474 iltgk = 1
475 iutgk = min( m, n )
476 ELSE IF( inds ) THEN
477 rngtgk = 'I'
478 iltgk = il
479 iutgk = iu
480 ELSE
481 rngtgk = 'V'
482 iltgk = 0
483 iutgk = 0
484 END IF
485*
486* Get machine constants
487*
488 eps = dlamch( 'P' )
489 smlnum = sqrt( dlamch( 'S' ) ) / eps
490 bignum = one / smlnum
491*
492* Scale A if max element outside range [SMLNUM,BIGNUM]
493*
494 anrm = zlange( 'M', m, n, a, lda, dum )
495 iscl = 0
496 IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
497 iscl = 1
498 CALL zlascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, info )
499 ELSE IF( anrm.GT.bignum ) THEN
500 iscl = 1
501 CALL zlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info )
502 END IF
503*
504 IF( m.GE.n ) THEN
505*
506* A has at least as many rows as columns. If A has sufficiently
507* more rows than columns, first reduce A using the QR
508* decomposition.
509*
510 IF( m.GE.mnthr ) THEN
511*
512* Path 1 (M much larger than N):
513* A = Q * R = Q * ( QB * B * PB**T )
514* = Q * ( QB * ( UB * S * VB**T ) * PB**T )
515* U = Q * QB * UB; V**T = VB**T * PB**T
516*
517* Compute A=Q*R
518* (Workspace: need 2*N, prefer N+N*NB)
519*
520 itau = 1
521 itemp = itau + n
522 CALL zgeqrf( m, n, a, lda, work( itau ), work( itemp ),
523 $ lwork-itemp+1, info )
524*
525* Copy R into WORK and bidiagonalize it:
526* (Workspace: need N*N+3*N, prefer N*N+N+2*N*NB)
527*
528 iqrf = itemp
529 itauq = itemp + n*n
530 itaup = itauq + n
531 itemp = itaup + n
532 id = 1
533 ie = id + n
534 itgkz = ie + n
535 CALL zlacpy( 'U', n, n, a, lda, work( iqrf ), n )
536 CALL zlaset( 'L', n-1, n-1, czero, czero,
537 $ work( iqrf+1 ), n )
538 CALL zgebrd( n, n, work( iqrf ), n, rwork( id ),
539 $ rwork( ie ), work( itauq ), work( itaup ),
540 $ work( itemp ), lwork-itemp+1, info )
541 itempr = itgkz + n*(n*2+1)
542*
543* Solve eigenvalue problem TGK*Z=Z*S.
544* (Workspace: need 2*N*N+14*N)
545*
546 CALL dbdsvdx( 'U', jobz, rngtgk, n, rwork( id ),
547 $ rwork( ie ), vl, vu, iltgk, iutgk, ns, s,
548 $ rwork( itgkz ), n*2, rwork( itempr ),
549 $ iwork, info)
550*
551* If needed, compute left singular vectors.
552*
553 IF( wantu ) THEN
554 k = itgkz
555 DO i = 1, ns
556 DO j = 1, n
557 u( j, i ) = dcmplx( rwork( k ), zero )
558 k = k + 1
559 END DO
560 k = k + n
561 END DO
562 CALL zlaset( 'A', m-n, ns, czero, czero, u( n+1,1 ), ldu)
563*
564* Call ZUNMBR to compute QB*UB.
565* (Workspace in WORK( ITEMP ): need N, prefer N*NB)
566*
567 CALL zunmbr( 'Q', 'L', 'N', n, ns, n, work( iqrf ), n,
568 $ work( itauq ), u, ldu, work( itemp ),
569 $ lwork-itemp+1, info )
570*
571* Call ZUNMQR to compute Q*(QB*UB).
572* (Workspace in WORK( ITEMP ): need N, prefer N*NB)
573*
574 CALL zunmqr( 'L', 'N', m, ns, n, a, lda,
575 $ work( itau ), u, ldu, work( itemp ),
576 $ lwork-itemp+1, info )
577 END IF
578*
579* If needed, compute right singular vectors.
580*
581 IF( wantvt) THEN
582 k = itgkz + n
583 DO i = 1, ns
584 DO j = 1, n
585 vt( i, j ) = dcmplx( rwork( k ), zero )
586 k = k + 1
587 END DO
588 k = k + n
589 END DO
590*
591* Call ZUNMBR to compute VB**T * PB**T
592* (Workspace in WORK( ITEMP ): need N, prefer N*NB)
593*
594 CALL zunmbr( 'P', 'R', 'C', ns, n, n, work( iqrf ), n,
595 $ work( itaup ), vt, ldvt, work( itemp ),
596 $ lwork-itemp+1, info )
597 END IF
598 ELSE
599*
600* Path 2 (M at least N, but not much larger)
601* Reduce A to bidiagonal form without QR decomposition
602* A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T
603* U = QB * UB; V**T = VB**T * PB**T
604*
605* Bidiagonalize A
606* (Workspace: need 2*N+M, prefer 2*N+(M+N)*NB)
607*
608 itauq = 1
609 itaup = itauq + n
610 itemp = itaup + n
611 id = 1
612 ie = id + n
613 itgkz = ie + n
614 CALL zgebrd( m, n, a, lda, rwork( id ), rwork( ie ),
615 $ work( itauq ), work( itaup ), work( itemp ),
616 $ lwork-itemp+1, info )
617 itempr = itgkz + n*(n*2+1)
618*
619* Solve eigenvalue problem TGK*Z=Z*S.
620* (Workspace: need 2*N*N+14*N)
621*
622 CALL dbdsvdx( 'U', jobz, rngtgk, n, rwork( id ),
623 $ rwork( ie ), vl, vu, iltgk, iutgk, ns, s,
624 $ rwork( itgkz ), n*2, rwork( itempr ),
625 $ iwork, info)
626*
627* If needed, compute left singular vectors.
628*
629 IF( wantu ) THEN
630 k = itgkz
631 DO i = 1, ns
632 DO j = 1, n
633 u( j, i ) = dcmplx( rwork( k ), zero )
634 k = k + 1
635 END DO
636 k = k + n
637 END DO
638 CALL zlaset( 'A', m-n, ns, czero, czero, u( n+1,1 ), ldu)
639*
640* Call ZUNMBR to compute QB*UB.
641* (Workspace in WORK( ITEMP ): need N, prefer N*NB)
642*
643 CALL zunmbr( 'Q', 'L', 'N', m, ns, n, a, lda,
644 $ work( itauq ), u, ldu, work( itemp ),
645 $ lwork-itemp+1, ierr )
646 END IF
647*
648* If needed, compute right singular vectors.
649*
650 IF( wantvt) THEN
651 k = itgkz + n
652 DO i = 1, ns
653 DO j = 1, n
654 vt( i, j ) = dcmplx( rwork( k ), zero )
655 k = k + 1
656 END DO
657 k = k + n
658 END DO
659*
660* Call ZUNMBR to compute VB**T * PB**T
661* (Workspace in WORK( ITEMP ): need N, prefer N*NB)
662*
663 CALL zunmbr( 'P', 'R', 'C', ns, n, n, a, lda,
664 $ work( itaup ), vt, ldvt, work( itemp ),
665 $ lwork-itemp+1, ierr )
666 END IF
667 END IF
668 ELSE
669*
670* A has more columns than rows. If A has sufficiently more
671* columns than rows, first reduce A using the LQ decomposition.
672*
673 IF( n.GE.mnthr ) THEN
674*
675* Path 1t (N much larger than M):
676* A = L * Q = ( QB * B * PB**T ) * Q
677* = ( QB * ( UB * S * VB**T ) * PB**T ) * Q
678* U = QB * UB ; V**T = VB**T * PB**T * Q
679*
680* Compute A=L*Q
681* (Workspace: need 2*M, prefer M+M*NB)
682*
683 itau = 1
684 itemp = itau + m
685 CALL zgelqf( m, n, a, lda, work( itau ), work( itemp ),
686 $ lwork-itemp+1, info )
687
688* Copy L into WORK and bidiagonalize it:
689* (Workspace in WORK( ITEMP ): need M*M+3*M, prefer M*M+M+2*M*NB)
690*
691 ilqf = itemp
692 itauq = ilqf + m*m
693 itaup = itauq + m
694 itemp = itaup + m
695 id = 1
696 ie = id + m
697 itgkz = ie + m
698 CALL zlacpy( 'L', m, m, a, lda, work( ilqf ), m )
699 CALL zlaset( 'U', m-1, m-1, czero, czero,
700 $ work( ilqf+m ), m )
701 CALL zgebrd( m, m, work( ilqf ), m, rwork( id ),
702 $ rwork( ie ), work( itauq ), work( itaup ),
703 $ work( itemp ), lwork-itemp+1, info )
704 itempr = itgkz + m*(m*2+1)
705*
706* Solve eigenvalue problem TGK*Z=Z*S.
707* (Workspace: need 2*M*M+14*M)
708*
709 CALL dbdsvdx( 'U', jobz, rngtgk, m, rwork( id ),
710 $ rwork( ie ), vl, vu, iltgk, iutgk, ns, s,
711 $ rwork( itgkz ), m*2, rwork( itempr ),
712 $ iwork, info)
713*
714* If needed, compute left singular vectors.
715*
716 IF( wantu ) THEN
717 k = itgkz
718 DO i = 1, ns
719 DO j = 1, m
720 u( j, i ) = dcmplx( rwork( k ), zero )
721 k = k + 1
722 END DO
723 k = k + m
724 END DO
725*
726* Call ZUNMBR to compute QB*UB.
727* (Workspace in WORK( ITEMP ): need M, prefer M*NB)
728*
729 CALL zunmbr( 'Q', 'L', 'N', m, ns, m, work( ilqf ), m,
730 $ work( itauq ), u, ldu, work( itemp ),
731 $ lwork-itemp+1, info )
732 END IF
733*
734* If needed, compute right singular vectors.
735*
736 IF( wantvt) THEN
737 k = itgkz + m
738 DO i = 1, ns
739 DO j = 1, m
740 vt( i, j ) = dcmplx( rwork( k ), zero )
741 k = k + 1
742 END DO
743 k = k + m
744 END DO
745 CALL zlaset( 'A', ns, n-m, czero, czero,
746 $ vt( 1,m+1 ), ldvt )
747*
748* Call ZUNMBR to compute (VB**T)*(PB**T)
749* (Workspace in WORK( ITEMP ): need M, prefer M*NB)
750*
751 CALL zunmbr( 'P', 'R', 'C', ns, m, m, work( ilqf ), m,
752 $ work( itaup ), vt, ldvt, work( itemp ),
753 $ lwork-itemp+1, info )
754*
755* Call ZUNMLQ to compute ((VB**T)*(PB**T))*Q.
756* (Workspace in WORK( ITEMP ): need M, prefer M*NB)
757*
758 CALL zunmlq( 'R', 'N', ns, n, m, a, lda,
759 $ work( itau ), vt, ldvt, work( itemp ),
760 $ lwork-itemp+1, info )
761 END IF
762 ELSE
763*
764* Path 2t (N greater than M, but not much larger)
765* Reduce to bidiagonal form without LQ decomposition
766* A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T
767* U = QB * UB; V**T = VB**T * PB**T
768*
769* Bidiagonalize A
770* (Workspace: need 2*M+N, prefer 2*M+(M+N)*NB)
771*
772 itauq = 1
773 itaup = itauq + m
774 itemp = itaup + m
775 id = 1
776 ie = id + m
777 itgkz = ie + m
778 CALL zgebrd( m, n, a, lda, rwork( id ), rwork( ie ),
779 $ work( itauq ), work( itaup ), work( itemp ),
780 $ lwork-itemp+1, info )
781 itempr = itgkz + m*(m*2+1)
782*
783* Solve eigenvalue problem TGK*Z=Z*S.
784* (Workspace: need 2*M*M+14*M)
785*
786 CALL dbdsvdx( 'L', jobz, rngtgk, m, rwork( id ),
787 $ rwork( ie ), vl, vu, iltgk, iutgk, ns, s,
788 $ rwork( itgkz ), m*2, rwork( itempr ),
789 $ iwork, info)
790*
791* If needed, compute left singular vectors.
792*
793 IF( wantu ) THEN
794 k = itgkz
795 DO i = 1, ns
796 DO j = 1, m
797 u( j, i ) = dcmplx( rwork( k ), zero )
798 k = k + 1
799 END DO
800 k = k + m
801 END DO
802*
803* Call ZUNMBR to compute QB*UB.
804* (Workspace in WORK( ITEMP ): need M, prefer M*NB)
805*
806 CALL zunmbr( 'Q', 'L', 'N', m, ns, n, a, lda,
807 $ work( itauq ), u, ldu, work( itemp ),
808 $ lwork-itemp+1, info )
809 END IF
810*
811* If needed, compute right singular vectors.
812*
813 IF( wantvt) THEN
814 k = itgkz + m
815 DO i = 1, ns
816 DO j = 1, m
817 vt( i, j ) = dcmplx( rwork( k ), zero )
818 k = k + 1
819 END DO
820 k = k + m
821 END DO
822 CALL zlaset( 'A', ns, n-m, czero, czero,
823 $ vt( 1,m+1 ), ldvt )
824*
825* Call ZUNMBR to compute VB**T * PB**T
826* (Workspace in WORK( ITEMP ): need M, prefer M*NB)
827*
828 CALL zunmbr( 'P', 'R', 'C', ns, n, m, a, lda,
829 $ work( itaup ), vt, ldvt, work( itemp ),
830 $ lwork-itemp+1, info )
831 END IF
832 END IF
833 END IF
834*
835* Undo scaling if necessary
836*
837 IF( iscl.EQ.1 ) THEN
838 IF( anrm.GT.bignum )
839 $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn, 1,
840 $ s, minmn, info )
841 IF( anrm.LT.smlnum )
842 $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn, 1,
843 $ s, minmn, info )
844 END IF
845*
846* Return optimal workspace in WORK(1)
847*
848 work( 1 ) = dcmplx( dble( maxwrk ), zero )
849*
850 RETURN
851*
852* End of ZGESVDX
853*
subroutine dbdsvdx(uplo, jobz, range, n, d, e, vl, vu, il, iu, ns, s, z, ldz, work, iwork, info)
DBDSVDX
Definition dbdsvdx.f:226
initmumps id

◆ zggsvd3()

subroutine zggsvd3 ( character jobu,
character jobv,
character jobq,
integer m,
integer n,
integer p,
integer k,
integer l,
complex*16, dimension( lda, * ) a,
integer lda,
complex*16, dimension( ldb, * ) b,
integer ldb,
double precision, dimension( * ) alpha,
double precision, dimension( * ) beta,
complex*16, dimension( ldu, * ) u,
integer ldu,
complex*16, dimension( ldv, * ) v,
integer ldv,
complex*16, dimension( ldq, * ) q,
integer ldq,
complex*16, dimension( * ) work,
integer lwork,
double precision, dimension( * ) rwork,
integer, dimension( * ) iwork,
integer info )

ZGGSVD3 computes the singular value decomposition (SVD) for OTHER matrices

Download ZGGSVD3 + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> ZGGSVD3 computes the generalized singular value decomposition (GSVD)
!> of an M-by-N complex matrix A and P-by-N complex matrix B:
!>
!>       U**H*A*Q = D1*( 0 R ),    V**H*B*Q = D2*( 0 R )
!>
!> where U, V and Q are unitary matrices.
!> Let K+L = the effective numerical rank of the
!> matrix (A**H,B**H)**H, then R is a (K+L)-by-(K+L) nonsingular upper
!> triangular matrix, D1 and D2 are M-by-(K+L) and P-by-(K+L) 
!> matrices and of the following structures, respectively:
!>
!> If M-K-L >= 0,
!>
!>                     K  L
!>        D1 =     K ( I  0 )
!>                 L ( 0  C )
!>             M-K-L ( 0  0 )
!>
!>                   K  L
!>        D2 =   L ( 0  S )
!>             P-L ( 0  0 )
!>
!>                 N-K-L  K    L
!>   ( 0 R ) = K (  0   R11  R12 )
!>             L (  0    0   R22 )
!> where
!>
!>   C = diag( ALPHA(K+1), ... , ALPHA(K+L) ),
!>   S = diag( BETA(K+1),  ... , BETA(K+L) ),
!>   C**2 + S**2 = I.
!>
!>   R is stored in A(1:K+L,N-K-L+1:N) on exit.
!>
!> If M-K-L < 0,
!>
!>                   K M-K K+L-M
!>        D1 =   K ( I  0    0   )
!>             M-K ( 0  C    0   )
!>
!>                     K M-K K+L-M
!>        D2 =   M-K ( 0  S    0  )
!>             K+L-M ( 0  0    I  )
!>               P-L ( 0  0    0  )
!>
!>                    N-K-L  K   M-K  K+L-M
!>   ( 0 R ) =     K ( 0    R11  R12  R13  )
!>               M-K ( 0     0   R22  R23  )
!>             K+L-M ( 0     0    0   R33  )
!>
!> where
!>
!>   C = diag( ALPHA(K+1), ... , ALPHA(M) ),
!>   S = diag( BETA(K+1),  ... , BETA(M) ),
!>   C**2 + S**2 = I.
!>
!>   (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored
!>   ( 0  R22 R23 )
!>   in B(M-K+1:L,N+M-K-L+1:N) on exit.
!>
!> The routine computes C, S, R, and optionally the unitary
!> transformation matrices U, V and Q.
!>
!> In particular, if B is an N-by-N nonsingular matrix, then the GSVD of
!> A and B implicitly gives the SVD of A*inv(B):
!>                      A*inv(B) = U*(D1*inv(D2))*V**H.
!> If ( A**H,B**H)**H has orthonormal columns, then the GSVD of A and B is also
!> equal to the CS decomposition of A and B. Furthermore, the GSVD can
!> be used to derive the solution of the eigenvalue problem:
!>                      A**H*A x = lambda* B**H*B x.
!> In some literature, the GSVD of A and B is presented in the form
!>                  U**H*A*X = ( 0 D1 ),   V**H*B*X = ( 0 D2 )
!> where U and V are orthogonal and X is nonsingular, and D1 and D2 are
!> ``diagonal''.  The former GSVD form can be converted to the latter
!> form by taking the nonsingular matrix X as
!>
!>                       X = Q*(  I   0    )
!>                             (  0 inv(R) )
!> 
Parameters
[in]JOBU
!>          JOBU is CHARACTER*1
!>          = 'U':  Unitary matrix U is computed;
!>          = 'N':  U is not computed.
!> 
[in]JOBV
!>          JOBV is CHARACTER*1
!>          = 'V':  Unitary matrix V is computed;
!>          = 'N':  V is not computed.
!> 
[in]JOBQ
!>          JOBQ is CHARACTER*1
!>          = 'Q':  Unitary matrix Q is computed;
!>          = 'N':  Q is not computed.
!> 
[in]M
!>          M is INTEGER
!>          The number of rows of the matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>          The number of columns of the matrices A and B.  N >= 0.
!> 
[in]P
!>          P is INTEGER
!>          The number of rows of the matrix B.  P >= 0.
!> 
[out]K
!>          K is INTEGER
!> 
[out]L
!>          L is INTEGER
!>
!>          On exit, K and L specify the dimension of the subblocks
!>          described in Purpose.
!>          K + L = effective numerical rank of (A**H,B**H)**H.
!> 
[in,out]A
!>          A is COMPLEX*16 array, dimension (LDA,N)
!>          On entry, the M-by-N matrix A.
!>          On exit, A contains the triangular matrix R, or part of R.
!>          See Purpose for details.
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A. LDA >= max(1,M).
!> 
[in,out]B
!>          B is COMPLEX*16 array, dimension (LDB,N)
!>          On entry, the P-by-N matrix B.
!>          On exit, B contains part of the triangular matrix R if
!>          M-K-L < 0.  See Purpose for details.
!> 
[in]LDB
!>          LDB is INTEGER
!>          The leading dimension of the array B. LDB >= max(1,P).
!> 
[out]ALPHA
!>          ALPHA is DOUBLE PRECISION array, dimension (N)
!> 
[out]BETA
!>          BETA is DOUBLE PRECISION array, dimension (N)
!>
!>          On exit, ALPHA and BETA contain the generalized singular
!>          value pairs of A and B;
!>            ALPHA(1:K) = 1,
!>            BETA(1:K)  = 0,
!>          and if M-K-L >= 0,
!>            ALPHA(K+1:K+L) = C,
!>            BETA(K+1:K+L)  = S,
!>          or if M-K-L < 0,
!>            ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0
!>            BETA(K+1:M) =S, BETA(M+1:K+L) =1
!>          and
!>            ALPHA(K+L+1:N) = 0
!>            BETA(K+L+1:N)  = 0
!> 
[out]U
!>          U is COMPLEX*16 array, dimension (LDU,M)
!>          If JOBU = 'U', U contains the M-by-M unitary matrix U.
!>          If JOBU = 'N', U is not referenced.
!> 
[in]LDU
!>          LDU is INTEGER
!>          The leading dimension of the array U. LDU >= max(1,M) if
!>          JOBU = 'U'; LDU >= 1 otherwise.
!> 
[out]V
!>          V is COMPLEX*16 array, dimension (LDV,P)
!>          If JOBV = 'V', V contains the P-by-P unitary matrix V.
!>          If JOBV = 'N', V is not referenced.
!> 
[in]LDV
!>          LDV is INTEGER
!>          The leading dimension of the array V. LDV >= max(1,P) if
!>          JOBV = 'V'; LDV >= 1 otherwise.
!> 
[out]Q
!>          Q is COMPLEX*16 array, dimension (LDQ,N)
!>          If JOBQ = 'Q', Q contains the N-by-N unitary matrix Q.
!>          If JOBQ = 'N', Q is not referenced.
!> 
[in]LDQ
!>          LDQ is INTEGER
!>          The leading dimension of the array Q. LDQ >= max(1,N) if
!>          JOBQ = 'Q'; LDQ >= 1 otherwise.
!> 
[out]WORK
!>          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
!>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          The dimension of the array WORK.
!>
!>          If LWORK = -1, then a workspace query is assumed; the routine
!>          only calculates the optimal size of the WORK array, returns
!>          this value as the first entry of the WORK array, and no error
!>          message related to LWORK is issued by XERBLA.
!> 
[out]RWORK
!>          RWORK is DOUBLE PRECISION array, dimension (2*N)
!> 
[out]IWORK
!>          IWORK is INTEGER array, dimension (N)
!>          On exit, IWORK stores the sorting information. More
!>          precisely, the following loop will sort ALPHA
!>             for I = K+1, min(M,K+L)
!>                 swap ALPHA(I) and ALPHA(IWORK(I))
!>             endfor
!>          such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N).
!> 
[out]INFO
!>          INFO is INTEGER
!>          = 0:  successful exit.
!>          < 0:  if INFO = -i, the i-th argument had an illegal value.
!>          > 0:  if INFO = 1, the Jacobi-type procedure failed to
!>                converge.  For further details, see subroutine ZTGSJA.
!> 
Internal Parameters:
!>  TOLA    DOUBLE PRECISION
!>  TOLB    DOUBLE PRECISION
!>          TOLA and TOLB are the thresholds to determine the effective
!>          rank of (A**H,B**H)**H. Generally, they are set to
!>                   TOLA = MAX(M,N)*norm(A)*MACHEPS,
!>                   TOLB = MAX(P,N)*norm(B)*MACHEPS.
!>          The size of TOLA and TOLB may affect the size of backward
!>          errors of the decomposition.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Contributors:
Ming Gu and Huan Ren, Computer Science Division, University of California at Berkeley, USA
Further Details:
ZGGSVD3 replaces the deprecated subroutine ZGGSVD.

Definition at line 350 of file zggsvd3.f.

353*
354* -- LAPACK driver routine --
355* -- LAPACK is a software package provided by Univ. of Tennessee, --
356* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
357*
358* .. Scalar Arguments ..
359 CHARACTER JOBQ, JOBU, JOBV
360 INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P,
361 $ LWORK
362* ..
363* .. Array Arguments ..
364 INTEGER IWORK( * )
365 DOUBLE PRECISION ALPHA( * ), BETA( * ), RWORK( * )
366 COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
367 $ U( LDU, * ), V( LDV, * ), WORK( * )
368* ..
369*
370* =====================================================================
371*
372* .. Local Scalars ..
373 LOGICAL WANTQ, WANTU, WANTV, LQUERY
374 INTEGER I, IBND, ISUB, J, NCYCLE, LWKOPT
375 DOUBLE PRECISION ANORM, BNORM, SMAX, TEMP, TOLA, TOLB, ULP, UNFL
376* ..
377* .. External Functions ..
378 LOGICAL LSAME
379 DOUBLE PRECISION DLAMCH, ZLANGE
380 EXTERNAL lsame, dlamch, zlange
381* ..
382* .. External Subroutines ..
383 EXTERNAL dcopy, xerbla, zggsvp3, ztgsja
384* ..
385* .. Intrinsic Functions ..
386 INTRINSIC max, min
387* ..
388* .. Executable Statements ..
389*
390* Decode and test the input parameters
391*
392 wantu = lsame( jobu, 'U' )
393 wantv = lsame( jobv, 'V' )
394 wantq = lsame( jobq, 'Q' )
395 lquery = ( lwork.EQ.-1 )
396 lwkopt = 1
397*
398* Test the input arguments
399*
400 info = 0
401 IF( .NOT.( wantu .OR. lsame( jobu, 'N' ) ) ) THEN
402 info = -1
403 ELSE IF( .NOT.( wantv .OR. lsame( jobv, 'N' ) ) ) THEN
404 info = -2
405 ELSE IF( .NOT.( wantq .OR. lsame( jobq, 'N' ) ) ) THEN
406 info = -3
407 ELSE IF( m.LT.0 ) THEN
408 info = -4
409 ELSE IF( n.LT.0 ) THEN
410 info = -5
411 ELSE IF( p.LT.0 ) THEN
412 info = -6
413 ELSE IF( lda.LT.max( 1, m ) ) THEN
414 info = -10
415 ELSE IF( ldb.LT.max( 1, p ) ) THEN
416 info = -12
417 ELSE IF( ldu.LT.1 .OR. ( wantu .AND. ldu.LT.m ) ) THEN
418 info = -16
419 ELSE IF( ldv.LT.1 .OR. ( wantv .AND. ldv.LT.p ) ) THEN
420 info = -18
421 ELSE IF( ldq.LT.1 .OR. ( wantq .AND. ldq.LT.n ) ) THEN
422 info = -20
423 ELSE IF( lwork.LT.1 .AND. .NOT.lquery ) THEN
424 info = -24
425 END IF
426*
427* Compute workspace
428*
429 IF( info.EQ.0 ) THEN
430 CALL zggsvp3( jobu, jobv, jobq, m, p, n, a, lda, b, ldb, tola,
431 $ tolb, k, l, u, ldu, v, ldv, q, ldq, iwork, rwork,
432 $ work, work, -1, info )
433 lwkopt = n + int( work( 1 ) )
434 lwkopt = max( 2*n, lwkopt )
435 lwkopt = max( 1, lwkopt )
436 work( 1 ) = dcmplx( lwkopt )
437 END IF
438*
439 IF( info.NE.0 ) THEN
440 CALL xerbla( 'ZGGSVD3', -info )
441 RETURN
442 END IF
443 IF( lquery ) THEN
444 RETURN
445 ENDIF
446*
447* Compute the Frobenius norm of matrices A and B
448*
449 anorm = zlange( '1', m, n, a, lda, rwork )
450 bnorm = zlange( '1', p, n, b, ldb, rwork )
451*
452* Get machine precision and set up threshold for determining
453* the effective numerical rank of the matrices A and B.
454*
455 ulp = dlamch( 'Precision' )
456 unfl = dlamch( 'Safe Minimum' )
457 tola = max( m, n )*max( anorm, unfl )*ulp
458 tolb = max( p, n )*max( bnorm, unfl )*ulp
459*
460 CALL zggsvp3( jobu, jobv, jobq, m, p, n, a, lda, b, ldb, tola,
461 $ tolb, k, l, u, ldu, v, ldv, q, ldq, iwork, rwork,
462 $ work, work( n+1 ), lwork-n, info )
463*
464* Compute the GSVD of two upper "triangular" matrices
465*
466 CALL ztgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b, ldb,
467 $ tola, tolb, alpha, beta, u, ldu, v, ldv, q, ldq,
468 $ work, ncycle, info )
469*
470* Sort the singular values and store the pivot indices in IWORK
471* Copy ALPHA to RWORK, then sort ALPHA in RWORK
472*
473 CALL dcopy( n, alpha, 1, rwork, 1 )
474 ibnd = min( l, m-k )
475 DO 20 i = 1, ibnd
476*
477* Scan for largest ALPHA(K+I)
478*
479 isub = i
480 smax = rwork( k+i )
481 DO 10 j = i + 1, ibnd
482 temp = rwork( k+j )
483 IF( temp.GT.smax ) THEN
484 isub = j
485 smax = temp
486 END IF
487 10 CONTINUE
488 IF( isub.NE.i ) THEN
489 rwork( k+isub ) = rwork( k+i )
490 rwork( k+i ) = smax
491 iwork( k+i ) = k + isub
492 ELSE
493 iwork( k+i ) = k + i
494 END IF
495 20 CONTINUE
496*
497 work( 1 ) = dcmplx( lwkopt )
498 RETURN
499*
500* End of ZGGSVD3
501*
#define alpha
Definition eval.h:35
subroutine zggsvp3(jobu, jobv, jobq, m, p, n, a, lda, b, ldb, tola, tolb, k, l, u, ldu, v, ldv, q, ldq, iwork, rwork, tau, work, lwork, info)
ZGGSVP3
Definition zggsvp3.f:278
subroutine ztgsja(jobu, jobv, jobq, m, p, n, k, l, a, lda, b, ldb, tola, tolb, alpha, beta, u, ldu, v, ldv, q, ldq, work, ncycle, info)
ZTGSJA
Definition ztgsja.f:379
subroutine dcopy(n, dx, incx, dy, incy)
DCOPY
Definition dcopy.f:82