OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches

Functions

subroutine cgejsv (joba, jobu, jobv, jobr, jobt, jobp, m, n, a, lda, sva, u, ldu, v, ldv, cwork, lwork, rwork, lrwork, iwork, info)
 CGEJSV
subroutine cgesdd (jobz, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, rwork, iwork, info)
 CGESDD
subroutine cgesvd (jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, rwork, info)
  CGESVD computes the singular value decomposition (SVD) for GE matrices
subroutine cgesvdq (joba, jobp, jobr, jobu, jobv, m, n, a, lda, s, u, ldu, v, ldv, numrank, iwork, liwork, cwork, lcwork, rwork, lrwork, info)
  CGESVDQ computes the singular value decomposition (SVD) with a QR-Preconditioned QR SVD Method for GE matrices
subroutine cgesvdx (jobu, jobvt, range, m, n, a, lda, vl, vu, il, iu, ns, s, u, ldu, vt, ldvt, work, lwork, rwork, iwork, info)
  CGESVDX computes the singular value decomposition (SVD) for GE matrices
subroutine cggsvd3 (jobu, jobv, jobq, m, n, p, k, l, a, lda, b, ldb, alpha, beta, u, ldu, v, ldv, q, ldq, work, lwork, rwork, iwork, info)
  CGGSVD3 computes the singular value decomposition (SVD) for OTHER matrices

Detailed Description

This is the group of complex singular value driver functions for GE matrices

Function Documentation

◆ cgejsv()

subroutine cgejsv ( character*1 joba,
character*1 jobu,
character*1 jobv,
character*1 jobr,
character*1 jobt,
character*1 jobp,
integer m,
integer n,
complex, dimension( lda, * ) a,
integer lda,
real, dimension( n ) sva,
complex, dimension( ldu, * ) u,
integer ldu,
complex, dimension( ldv, * ) v,
integer ldv,
complex, dimension( lwork ) cwork,
integer lwork,
real, dimension( lrwork ) rwork,
integer lrwork,
integer, dimension( * ) iwork,
integer info )

CGEJSV

Download CGEJSV + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> CGEJSV computes the singular value decomposition (SVD) of a complex M-by-N
!> matrix [A], where M >= N. The SVD of [A] is written as
!>
!>              [A] = [U] * [SIGMA] * [V]^*,
!>
!> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N
!> diagonal elements, [U] is an M-by-N (or M-by-M) unitary matrix, and
!> [V] is an N-by-N unitary matrix. The diagonal elements of [SIGMA] are
!> the singular values of [A]. The columns of [U] and [V] are the left and
!> the right singular vectors of [A], respectively. The matrices [U] and [V]
!> are computed and stored in the arrays U and V, respectively. The diagonal
!> of [SIGMA] is computed and stored in the array SVA.
!> 

Arguments:

Parameters
[in]JOBA
!>          JOBA is CHARACTER*1
!>         Specifies the level of accuracy:
!>       = 'C': This option works well (high relative accuracy) if A = B * D,
!>              with well-conditioned B and arbitrary diagonal matrix D.
!>              The accuracy cannot be spoiled by COLUMN scaling. The
!>              accuracy of the computed output depends on the condition of
!>              B, and the procedure aims at the best theoretical accuracy.
!>              The relative error max_{i=1:N}|d sigma_i| / sigma_i is
!>              bounded by f(M,N)*epsilon* cond(B), independent of D.
!>              The input matrix is preprocessed with the QRF with column
!>              pivoting. This initial preprocessing and preconditioning by
!>              a rank revealing QR factorization is common for all values of
!>              JOBA. Additional actions are specified as follows:
!>       = 'E': Computation as with 'C' with an additional estimate of the
!>              condition number of B. It provides a realistic error bound.
!>       = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings
!>              D1, D2, and well-conditioned matrix C, this option gives
!>              higher accuracy than the 'C' option. If the structure of the
!>              input matrix is not known, and relative accuracy is
!>              desirable, then this option is advisable. The input matrix A
!>              is preprocessed with QR factorization with FULL (row and
!>              column) pivoting.
!>       = 'G': Computation as with 'F' with an additional estimate of the
!>              condition number of B, where A=B*D. If A has heavily weighted
!>              rows, then using this condition number gives too pessimistic
!>              error bound.
!>       = 'A': Small singular values are not well determined by the data 
!>              and are considered as noisy; the matrix is treated as
!>              numerically rank deficient. The error in the computed
!>              singular values is bounded by f(m,n)*epsilon*||A||.
!>              The computed SVD A = U * S * V^* restores A up to
!>              f(m,n)*epsilon*||A||.
!>              This gives the procedure the licence to discard (set to zero)
!>              all singular values below N*epsilon*||A||.
!>       = 'R': Similar as in 'A'. Rank revealing property of the initial
!>              QR factorization is used do reveal (using triangular factor)
!>              a gap sigma_{r+1} < epsilon * sigma_r in which case the
!>              numerical RANK is declared to be r. The SVD is computed with
!>              absolute error bounds, but more accurately than with 'A'.
!> 
[in]JOBU
!>          JOBU is CHARACTER*1
!>         Specifies whether to compute the columns of U:
!>       = 'U': N columns of U are returned in the array U.
!>       = 'F': full set of M left sing. vectors is returned in the array U.
!>       = 'W': U may be used as workspace of length M*N. See the description
!>              of U.
!>       = 'N': U is not computed.
!> 
[in]JOBV
!>          JOBV is CHARACTER*1
!>         Specifies whether to compute the matrix V:
!>       = 'V': N columns of V are returned in the array V; Jacobi rotations
!>              are not explicitly accumulated.
!>       = 'J': N columns of V are returned in the array V, but they are
!>              computed as the product of Jacobi rotations, if JOBT = 'N'.
!>       = 'W': V may be used as workspace of length N*N. See the description
!>              of V.
!>       = 'N': V is not computed.
!> 
[in]JOBR
!>          JOBR is CHARACTER*1
!>         Specifies the RANGE for the singular values. Issues the licence to
!>         set to zero small positive singular values if they are outside
!>         specified range. If A .NE. 0 is scaled so that the largest singular
!>         value of c*A is around SQRT(BIG), BIG=SLAMCH('O'), then JOBR issues
!>         the licence to kill columns of A whose norm in c*A is less than
!>         SQRT(SFMIN) (for JOBR = 'R'), or less than SMALL=SFMIN/EPSLN,
!>         where SFMIN=SLAMCH('S'), EPSLN=SLAMCH('E').
!>       = 'N': Do not kill small columns of c*A. This option assumes that
!>              BLAS and QR factorizations and triangular solvers are
!>              implemented to work in that range. If the condition of A
!>              is greater than BIG, use CGESVJ.
!>       = 'R': RESTRICTED range for sigma(c*A) is [SQRT(SFMIN), SQRT(BIG)]
!>              (roughly, as described above). This option is recommended.
!>                                             ===========================
!>         For computing the singular values in the FULL range [SFMIN,BIG]
!>         use CGESVJ.
!> 
[in]JOBT
!>          JOBT is CHARACTER*1
!>         If the matrix is square then the procedure may determine to use
!>         transposed A if A^* seems to be better with respect to convergence.
!>         If the matrix is not square, JOBT is ignored.
!>         The decision is based on two values of entropy over the adjoint
!>         orbit of A^* * A. See the descriptions of WORK(6) and WORK(7).
!>       = 'T': transpose if entropy test indicates possibly faster
!>         convergence of Jacobi process if A^* is taken as input. If A is
!>         replaced with A^*, then the row pivoting is included automatically.
!>       = 'N': do not speculate.
!>         The option 'T' can be used to compute only the singular values, or
!>         the full SVD (U, SIGMA and V). For only one set of singular vectors
!>         (U or V), the caller should provide both U and V, as one of the
!>         matrices is used as workspace if the matrix A is transposed.
!>         The implementer can easily remove this constraint and make the
!>         code more complicated. See the descriptions of U and V.
!>         In general, this option is considered experimental, and 'N'; should
!>         be preferred. This is subject to changes in the future.
!> 
[in]JOBP
!>          JOBP is CHARACTER*1
!>         Issues the licence to introduce structured perturbations to drown
!>         denormalized numbers. This licence should be active if the
!>         denormals are poorly implemented, causing slow computation,
!>         especially in cases of fast convergence (!). For details see [1,2].
!>         For the sake of simplicity, this perturbations are included only
!>         when the full SVD or only the singular values are requested. The
!>         implementer/user can easily add the perturbation for the cases of
!>         computing one set of singular vectors.
!>       = 'P': introduce perturbation
!>       = 'N': do not perturb
!> 
[in]M
!>          M is INTEGER
!>         The number of rows of the input matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>         The number of columns of the input matrix A. M >= N >= 0.
!> 
[in,out]A
!>          A is COMPLEX array, dimension (LDA,N)
!>          On entry, the M-by-N matrix A.
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A.  LDA >= max(1,M).
!> 
[out]SVA
!>          SVA is REAL array, dimension (N)
!>          On exit,
!>          - For WORK(1)/WORK(2) = ONE: The singular values of A. During the
!>            computation SVA contains Euclidean column norms of the
!>            iterated matrices in the array A.
!>          - For WORK(1) .NE. WORK(2): The singular values of A are
!>            (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if
!>            sigma_max(A) overflows or if small singular values have been
!>            saved from underflow by scaling the input matrix A.
!>          - If JOBR='R' then some of the singular values may be returned
!>            as exact zeros obtained by  because they are
!>            below the numerical rank threshold or are denormalized numbers.
!> 
[out]U
!>          U is COMPLEX array, dimension ( LDU, N ) or ( LDU, M )
!>          If JOBU = 'U', then U contains on exit the M-by-N matrix of
!>                         the left singular vectors.
!>          If JOBU = 'F', then U contains on exit the M-by-M matrix of
!>                         the left singular vectors, including an ONB
!>                         of the orthogonal complement of the Range(A).
!>          If JOBU = 'W'  .AND. (JOBV = 'V' .AND. JOBT = 'T' .AND. M = N),
!>                         then U is used as workspace if the procedure
!>                         replaces A with A^*. In that case, [V] is computed
!>                         in U as left singular vectors of A^* and then
!>                         copied back to the V array. This 'W' option is just
!>                         a reminder to the caller that in this case U is
!>                         reserved as workspace of length N*N.
!>          If JOBU = 'N'  U is not referenced, unless JOBT='T'.
!> 
[in]LDU
!>          LDU is INTEGER
!>          The leading dimension of the array U,  LDU >= 1.
!>          IF  JOBU = 'U' or 'F' or 'W',  then LDU >= M.
!> 
[out]V
!>          V is COMPLEX array, dimension ( LDV, N )
!>          If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of
!>                         the right singular vectors;
!>          If JOBV = 'W', AND (JOBU = 'U' AND JOBT = 'T' AND M = N),
!>                         then V is used as workspace if the pprocedure
!>                         replaces A with A^*. In that case, [U] is computed
!>                         in V as right singular vectors of A^* and then
!>                         copied back to the U array. This 'W' option is just
!>                         a reminder to the caller that in this case V is
!>                         reserved as workspace of length N*N.
!>          If JOBV = 'N'  V is not referenced, unless JOBT='T'.
!> 
[in]LDV
!>          LDV is INTEGER
!>          The leading dimension of the array V,  LDV >= 1.
!>          If JOBV = 'V' or 'J' or 'W', then LDV >= N.
!> 
[out]CWORK
!>          CWORK is COMPLEX array, dimension (MAX(2,LWORK))
!>          If the call to CGEJSV is a workspace query (indicated by LWORK=-1 or
!>          LRWORK=-1), then on exit CWORK(1) contains the required length of 
!>          CWORK for the job parameters used in the call.
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          Length of CWORK to confirm proper allocation of workspace.
!>          LWORK depends on the job:
!>
!>          1. If only SIGMA is needed ( JOBU = 'N', JOBV = 'N' ) and
!>            1.1 .. no scaled condition estimate required (JOBA.NE.'E'.AND.JOBA.NE.'G'):
!>               LWORK >= 2*N+1. This is the minimal requirement.
!>               ->> For optimal performance (blocked code) the optimal value
!>               is LWORK >= N + (N+1)*NB. Here NB is the optimal
!>               block size for CGEQP3 and CGEQRF.
!>               In general, optimal LWORK is computed as
!>               LWORK >= max(N+LWORK(CGEQP3),N+LWORK(CGEQRF), LWORK(CGESVJ)).        
!>            1.2. .. an estimate of the scaled condition number of A is
!>               required (JOBA='E', or 'G'). In this case, LWORK the minimal
!>               requirement is LWORK >= N*N + 2*N.
!>               ->> For optimal performance (blocked code) the optimal value
!>               is LWORK >= max(N+(N+1)*NB, N*N+2*N)=N**2+2*N.
!>               In general, the optimal length LWORK is computed as
!>               LWORK >= max(N+LWORK(CGEQP3),N+LWORK(CGEQRF), LWORK(CGESVJ),
!>                            N*N+LWORK(CPOCON)).
!>          2. If SIGMA and the right singular vectors are needed (JOBV = 'V'),
!>             (JOBU = 'N')
!>            2.1   .. no scaled condition estimate requested (JOBE = 'N'):    
!>            -> the minimal requirement is LWORK >= 3*N.
!>            -> For optimal performance, 
!>               LWORK >= max(N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB,
!>               where NB is the optimal block size for CGEQP3, CGEQRF, CGELQ,
!>               CUNMLQ. In general, the optimal length LWORK is computed as
!>               LWORK >= max(N+LWORK(CGEQP3), N+LWORK(CGESVJ),
!>                       N+LWORK(CGELQF), 2*N+LWORK(CGEQRF), N+LWORK(CUNMLQ)).
!>            2.2 .. an estimate of the scaled condition number of A is
!>               required (JOBA='E', or 'G').
!>            -> the minimal requirement is LWORK >= 3*N.      
!>            -> For optimal performance, 
!>               LWORK >= max(N+(N+1)*NB, 2*N,2*N+N*NB)=2*N+N*NB,
!>               where NB is the optimal block size for CGEQP3, CGEQRF, CGELQ,
!>               CUNMLQ. In general, the optimal length LWORK is computed as
!>               LWORK >= max(N+LWORK(CGEQP3), LWORK(CPOCON), N+LWORK(CGESVJ),
!>                       N+LWORK(CGELQF), 2*N+LWORK(CGEQRF), N+LWORK(CUNMLQ)).   
!>          3. If SIGMA and the left singular vectors are needed
!>            3.1  .. no scaled condition estimate requested (JOBE = 'N'):
!>            -> the minimal requirement is LWORK >= 3*N.
!>            -> For optimal performance:
!>               if JOBU = 'U' :: LWORK >= max(3*N, N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB,
!>               where NB is the optimal block size for CGEQP3, CGEQRF, CUNMQR.
!>               In general, the optimal length LWORK is computed as
!>               LWORK >= max(N+LWORK(CGEQP3), 2*N+LWORK(CGEQRF), N+LWORK(CUNMQR)). 
!>            3.2  .. an estimate of the scaled condition number of A is
!>               required (JOBA='E', or 'G').
!>            -> the minimal requirement is LWORK >= 3*N.
!>            -> For optimal performance:
!>               if JOBU = 'U' :: LWORK >= max(3*N, N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB,
!>               where NB is the optimal block size for CGEQP3, CGEQRF, CUNMQR.
!>               In general, the optimal length LWORK is computed as
!>               LWORK >= max(N+LWORK(CGEQP3),N+LWORK(CPOCON),
!>                        2*N+LWORK(CGEQRF), N+LWORK(CUNMQR)).
!>
!>          4. If the full SVD is needed: (JOBU = 'U' or JOBU = 'F') and
!>            4.1. if JOBV = 'V'
!>               the minimal requirement is LWORK >= 5*N+2*N*N.
!>            4.2. if JOBV = 'J' the minimal requirement is
!>               LWORK >= 4*N+N*N.
!>            In both cases, the allocated CWORK can accommodate blocked runs
!>            of CGEQP3, CGEQRF, CGELQF, CUNMQR, CUNMLQ.
!> 
!>          If the call to CGEJSV is a workspace query (indicated by LWORK=-1 or
!>          LRWORK=-1), then on exit CWORK(1) contains the optimal and CWORK(2) contains the
!>          minimal length of CWORK for the job parameters used in the call.        
!> 
[out]RWORK
!>          RWORK is REAL array, dimension (MAX(7,LWORK))
!>          On exit,
!>          RWORK(1) = Determines the scaling factor SCALE = RWORK(2) / RWORK(1)
!>                    such that SCALE*SVA(1:N) are the computed singular values
!>                    of A. (See the description of SVA().)
!>          RWORK(2) = See the description of RWORK(1).
!>          RWORK(3) = SCONDA is an estimate for the condition number of
!>                    column equilibrated A. (If JOBA = 'E' or 'G')
!>                    SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1).
!>                    It is computed using CPOCON. It holds
!>                    N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
!>                    where R is the triangular factor from the QRF of A.
!>                    However, if R is truncated and the numerical rank is
!>                    determined to be strictly smaller than N, SCONDA is
!>                    returned as -1, thus indicating that the smallest
!>                    singular values might be lost.
!>
!>          If full SVD is needed, the following two condition numbers are
!>          useful for the analysis of the algorithm. They are provided for
!>          a developer/implementer who is familiar with the details of
!>          the method.
!>
!>          RWORK(4) = an estimate of the scaled condition number of the
!>                    triangular factor in the first QR factorization.
!>          RWORK(5) = an estimate of the scaled condition number of the
!>                    triangular factor in the second QR factorization.
!>          The following two parameters are computed if JOBT = 'T'.
!>          They are provided for a developer/implementer who is familiar
!>          with the details of the method.
!>          RWORK(6) = the entropy of A^* * A :: this is the Shannon entropy
!>                    of diag(A^* * A) / Trace(A^* * A) taken as point in the
!>                    probability simplex.
!>          RWORK(7) = the entropy of A * A^*. (See the description of RWORK(6).)
!>          If the call to CGEJSV is a workspace query (indicated by LWORK=-1 or
!>          LRWORK=-1), then on exit RWORK(1) contains the required length of
!>          RWORK for the job parameters used in the call.
!> 
[in]LRWORK
!>          LRWORK is INTEGER
!>          Length of RWORK to confirm proper allocation of workspace.
!>          LRWORK depends on the job:
!>
!>       1. If only the singular values are requested i.e. if
!>          LSAME(JOBU,'N') .AND. LSAME(JOBV,'N')
!>          then:
!>          1.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
!>               then: LRWORK = max( 7, 2 * M ).
!>          1.2. Otherwise, LRWORK  = max( 7,  N ).
!>       2. If singular values with the right singular vectors are requested
!>          i.e. if
!>          (LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) .AND.
!>          .NOT.(LSAME(JOBU,'U').OR.LSAME(JOBU,'F'))
!>          then:
!>          2.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
!>          then LRWORK = max( 7, 2 * M ).
!>          2.2. Otherwise, LRWORK  = max( 7,  N ).
!>       3. If singular values with the left singular vectors are requested, i.e. if
!>          (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND.
!>          .NOT.(LSAME(JOBV,'V').OR.LSAME(JOBV,'J'))
!>          then:
!>          3.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
!>          then LRWORK = max( 7, 2 * M ).
!>          3.2. Otherwise, LRWORK  = max( 7,  N ).
!>       4. If singular values with both the left and the right singular vectors
!>          are requested, i.e. if
!>          (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND.
!>          (LSAME(JOBV,'V').OR.LSAME(JOBV,'J'))
!>          then:
!>          4.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
!>          then LRWORK = max( 7, 2 * M ).
!>          4.2. Otherwise, LRWORK  = max( 7, N ).
!> 
!>          If, on entry, LRWORK = -1 or LWORK=-1, a workspace query is assumed and 
!>          the length of RWORK is returned in RWORK(1). 
!> 
[out]IWORK
!>          IWORK is INTEGER array, of dimension at least 4, that further depends
!>          on the job:
!> 
!>          1. If only the singular values are requested then:
!>             If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) 
!>             then the length of IWORK is N+M; otherwise the length of IWORK is N.
!>          2. If the singular values and the right singular vectors are requested then:
!>             If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) 
!>             then the length of IWORK is N+M; otherwise the length of IWORK is N. 
!>          3. If the singular values and the left singular vectors are requested then:
!>             If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) 
!>             then the length of IWORK is N+M; otherwise the length of IWORK is N. 
!>          4. If the singular values with both the left and the right singular vectors
!>             are requested, then:      
!>             4.1. If LSAME(JOBV,'J') the length of IWORK is determined as follows:
!>                  If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) 
!>                  then the length of IWORK is N+M; otherwise the length of IWORK is N. 
!>             4.2. If LSAME(JOBV,'V') the length of IWORK is determined as follows:
!>                  If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) 
!>                  then the length of IWORK is 2*N+M; otherwise the length of IWORK is 2*N.
!>        
!>          On exit,
!>          IWORK(1) = the numerical rank determined after the initial
!>                     QR factorization with pivoting. See the descriptions
!>                     of JOBA and JOBR.
!>          IWORK(2) = the number of the computed nonzero singular values
!>          IWORK(3) = if nonzero, a warning message:
!>                     If IWORK(3) = 1 then some of the column norms of A
!>                     were denormalized floats. The requested high accuracy
!>                     is not warranted by the data.
!>          IWORK(4) = 1 or -1. If IWORK(4) = 1, then the procedure used A^* to
!>                     do the job as specified by the JOB parameters.
!>          If the call to CGEJSV is a workspace query (indicated by LWORK = -1 and 
!>          LRWORK = -1), then on exit IWORK(1) contains the required length of 
!>          IWORK for the job parameters used in the call.
!> 
[out]INFO
!>          INFO is INTEGER
!>           < 0:  if INFO = -i, then the i-th argument had an illegal value.
!>           = 0:  successful exit;
!>           > 0:  CGEJSV  did not converge in the maximal allowed number
!>                 of sweeps. The computed values may be inaccurate.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Further Details:
!>  CGEJSV implements a preconditioned Jacobi SVD algorithm. It uses CGEQP3,
!>  CGEQRF, and CGELQF as preprocessors and preconditioners. Optionally, an
!>  additional row pivoting can be used as a preprocessor, which in some
!>  cases results in much higher accuracy. An example is matrix A with the
!>  structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned
!>  diagonal matrices and C is well-conditioned matrix. In that case, complete
!>  pivoting in the first QR factorizations provides accuracy dependent on the
!>  condition number of C, and independent of D1, D2. Such higher accuracy is
!>  not completely understood theoretically, but it works well in practice.
!>  Further, if A can be written as A = B*D, with well-conditioned B and some
!>  diagonal D, then the high accuracy is guaranteed, both theoretically and
!>  in software, independent of D. For more details see [1], [2].
!>     The computational range for the singular values can be the full range
!>  ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS
!>  & LAPACK routines called by CGEJSV are implemented to work in that range.
!>  If that is not the case, then the restriction for safe computation with
!>  the singular values in the range of normalized IEEE numbers is that the
!>  spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not
!>  overflow. This code (CGEJSV) is best used in this restricted range,
!>  meaning that singular values of magnitude below ||A||_2 / SLAMCH('O') are
!>  returned as zeros. See JOBR for details on this.
!>     Further, this implementation is somewhat slower than the one described
!>  in [1,2] due to replacement of some non-LAPACK components, and because
!>  the choice of some tuning parameters in the iterative part (CGESVJ) is
!>  left to the implementer on a particular machine.
!>     The rank revealing QR factorization (in this code: CGEQP3) should be
!>  implemented as in [3]. We have a new version of CGEQP3 under development
!>  that is more robust than the current one in LAPACK, with a cleaner cut in
!>  rank deficient cases. It will be available in the SIGMA library [4].
!>  If M is much larger than N, it is obvious that the initial QRF with
!>  column pivoting can be preprocessed by the QRF without pivoting. That
!>  well known trick is not used in CGEJSV because in some cases heavy row
!>  weighting can be treated with complete pivoting. The overhead in cases
!>  M much larger than N is then only due to pivoting, but the benefits in
!>  terms of accuracy have prevailed. The implementer/user can incorporate
!>  this extra QRF step easily. The implementer can also improve data movement
!>  (matrix transpose, matrix copy, matrix transposed copy) - this
!>  implementation of CGEJSV uses only the simplest, naive data movement.
!> 
Contributor:
Zlatko Drmac (Zagreb, Croatia)
References:
!>
!> [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I.
!>     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342.
!>     LAPACK Working note 169.
!> [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II.
!>     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362.
!>     LAPACK Working note 170.
!> [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR
!>     factorization software - a case study.
!>     ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28.
!>     LAPACK Working note 176.
!> [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV,
!>     QSVD, (H,K)-SVD computations.
!>     Department of Mathematics, University of Zagreb, 2008, 2016.
!> 
Bugs, examples and comments:
Please report all bugs and send interesting examples and/or comments to drmac.nosp@m.@mat.nosp@m.h.hr. Thank you.

Definition at line 565 of file cgejsv.f.

568*
569* -- LAPACK computational routine --
570* -- LAPACK is a software package provided by Univ. of Tennessee, --
571* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
572*
573* .. Scalar Arguments ..
574 IMPLICIT NONE
575 INTEGER INFO, LDA, LDU, LDV, LWORK, LRWORK, M, N
576* ..
577* .. Array Arguments ..
578 COMPLEX A( LDA, * ), U( LDU, * ), V( LDV, * ), CWORK( LWORK )
579 REAL SVA( N ), RWORK( LRWORK )
580 INTEGER IWORK( * )
581 CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV
582* ..
583*
584* ===========================================================================
585*
586* .. Local Parameters ..
587 REAL ZERO, ONE
588 parameter( zero = 0.0e0, one = 1.0e0 )
589 COMPLEX CZERO, CONE
590 parameter( czero = ( 0.0e0, 0.0e0 ), cone = ( 1.0e0, 0.0e0 ) )
591* ..
592* .. Local Scalars ..
593 COMPLEX CTEMP
594 REAL AAPP, AAQQ, AATMAX, AATMIN, BIG, BIG1, COND_OK,
595 $ CONDR1, CONDR2, ENTRA, ENTRAT, EPSLN, MAXPRJ, SCALEM,
596 $ SCONDA, SFMIN, SMALL, TEMP1, USCAL1, USCAL2, XSC
597 INTEGER IERR, N1, NR, NUMRANK, p, q, WARNING
598 LOGICAL ALMORT, DEFR, ERREST, GOSCAL, JRACC, KILL, LQUERY,
599 $ LSVEC, L2ABER, L2KILL, L2PERT, L2RANK, L2TRAN, NOSCAL,
600 $ ROWPIV, RSVEC, TRANSP
601*
602 INTEGER OPTWRK, MINWRK, MINRWRK, MINIWRK
603 INTEGER LWCON, LWLQF, LWQP3, LWQRF, LWUNMLQ, LWUNMQR, LWUNMQRM,
604 $ LWSVDJ, LWSVDJV, LRWQP3, LRWCON, LRWSVDJ, IWOFF
605 INTEGER LWRK_CGELQF, LWRK_CGEQP3, LWRK_CGEQP3N, LWRK_CGEQRF,
606 $ LWRK_CGESVJ, LWRK_CGESVJV, LWRK_CGESVJU, LWRK_CUNMLQ,
607 $ LWRK_CUNMQR, LWRK_CUNMQRM
608* ..
609* .. Local Arrays
610 COMPLEX CDUMMY(1)
611 REAL RDUMMY(1)
612*
613* .. Intrinsic Functions ..
614 INTRINSIC abs, cmplx, conjg, alog, max, min, real, nint, sqrt
615* ..
616* .. External Functions ..
617 REAL SLAMCH, SCNRM2
618 INTEGER ISAMAX, ICAMAX
619 LOGICAL LSAME
620 EXTERNAL isamax, icamax, lsame, slamch, scnrm2
621* ..
622* .. External Subroutines ..
626 $ xerbla
627*
628 EXTERNAL cgesvj
629* ..
630*
631* Test the input arguments
632*
633 lsvec = lsame( jobu, 'U' ) .OR. lsame( jobu, 'F' )
634 jracc = lsame( jobv, 'J' )
635 rsvec = lsame( jobv, 'V' ) .OR. jracc
636 rowpiv = lsame( joba, 'F' ) .OR. lsame( joba, 'G' )
637 l2rank = lsame( joba, 'R' )
638 l2aber = lsame( joba, 'A' )
639 errest = lsame( joba, 'E' ) .OR. lsame( joba, 'G' )
640 l2tran = lsame( jobt, 'T' ) .AND. ( m .EQ. n )
641 l2kill = lsame( jobr, 'R' )
642 defr = lsame( jobr, 'N' )
643 l2pert = lsame( jobp, 'P' )
644*
645 lquery = ( lwork .EQ. -1 ) .OR. ( lrwork .EQ. -1 )
646*
647 IF ( .NOT.(rowpiv .OR. l2rank .OR. l2aber .OR.
648 $ errest .OR. lsame( joba, 'C' ) )) THEN
649 info = - 1
650 ELSE IF ( .NOT.( lsvec .OR. lsame( jobu, 'N' ) .OR.
651 $ ( lsame( jobu, 'W' ) .AND. rsvec .AND. l2tran ) ) ) THEN
652 info = - 2
653 ELSE IF ( .NOT.( rsvec .OR. lsame( jobv, 'N' ) .OR.
654 $ ( lsame( jobv, 'W' ) .AND. lsvec .AND. l2tran ) ) ) THEN
655 info = - 3
656 ELSE IF ( .NOT. ( l2kill .OR. defr ) ) THEN
657 info = - 4
658 ELSE IF ( .NOT. ( lsame(jobt,'T') .OR. lsame(jobt,'N') ) ) THEN
659 info = - 5
660 ELSE IF ( .NOT. ( l2pert .OR. lsame( jobp, 'N' ) ) ) THEN
661 info = - 6
662 ELSE IF ( m .LT. 0 ) THEN
663 info = - 7
664 ELSE IF ( ( n .LT. 0 ) .OR. ( n .GT. m ) ) THEN
665 info = - 8
666 ELSE IF ( lda .LT. m ) THEN
667 info = - 10
668 ELSE IF ( lsvec .AND. ( ldu .LT. m ) ) THEN
669 info = - 13
670 ELSE IF ( rsvec .AND. ( ldv .LT. n ) ) THEN
671 info = - 15
672 ELSE
673* #:)
674 info = 0
675 END IF
676*
677 IF ( info .EQ. 0 ) THEN
678* .. compute the minimal and the optimal workspace lengths
679* [[The expressions for computing the minimal and the optimal
680* values of LCWORK, LRWORK are written with a lot of redundancy and
681* can be simplified. However, this verbose form is useful for
682* maintenance and modifications of the code.]]
683*
684* .. minimal workspace length for CGEQP3 of an M x N matrix,
685* CGEQRF of an N x N matrix, CGELQF of an N x N matrix,
686* CUNMLQ for computing N x N matrix, CUNMQR for computing N x N
687* matrix, CUNMQR for computing M x N matrix, respectively.
688 lwqp3 = n+1
689 lwqrf = max( 1, n )
690 lwlqf = max( 1, n )
691 lwunmlq = max( 1, n )
692 lwunmqr = max( 1, n )
693 lwunmqrm = max( 1, m )
694* .. minimal workspace length for CPOCON of an N x N matrix
695 lwcon = 2 * n
696* .. minimal workspace length for CGESVJ of an N x N matrix,
697* without and with explicit accumulation of Jacobi rotations
698 lwsvdj = max( 2 * n, 1 )
699 lwsvdjv = max( 2 * n, 1 )
700* .. minimal REAL workspace length for CGEQP3, CPOCON, CGESVJ
701 lrwqp3 = 2 * n
702 lrwcon = n
703 lrwsvdj = n
704 IF ( lquery ) THEN
705 CALL cgeqp3( m, n, a, lda, iwork, cdummy, cdummy, -1,
706 $ rdummy, ierr )
707 lwrk_cgeqp3 = real( cdummy(1) )
708 CALL cgeqrf( n, n, a, lda, cdummy, cdummy,-1, ierr )
709 lwrk_cgeqrf = real( cdummy(1) )
710 CALL cgelqf( n, n, a, lda, cdummy, cdummy,-1, ierr )
711 lwrk_cgelqf = real( cdummy(1) )
712 END IF
713 minwrk = 2
714 optwrk = 2
715 miniwrk = n
716 IF ( .NOT. (lsvec .OR. rsvec ) ) THEN
717* .. minimal and optimal sizes of the complex workspace if
718* only the singular values are requested
719 IF ( errest ) THEN
720 minwrk = max( n+lwqp3, n**2+lwcon, n+lwqrf, lwsvdj )
721 ELSE
722 minwrk = max( n+lwqp3, n+lwqrf, lwsvdj )
723 END IF
724 IF ( lquery ) THEN
725 CALL cgesvj( 'L', 'N', 'N', n, n, a, lda, sva, n, v,
726 $ ldv, cdummy, -1, rdummy, -1, ierr )
727 lwrk_cgesvj = real( cdummy(1) )
728 IF ( errest ) THEN
729 optwrk = max( n+lwrk_cgeqp3, n**2+lwcon,
730 $ n+lwrk_cgeqrf, lwrk_cgesvj )
731 ELSE
732 optwrk = max( n+lwrk_cgeqp3, n+lwrk_cgeqrf,
733 $ lwrk_cgesvj )
734 END IF
735 END IF
736 IF ( l2tran .OR. rowpiv ) THEN
737 IF ( errest ) THEN
738 minrwrk = max( 7, 2*m, lrwqp3, lrwcon, lrwsvdj )
739 ELSE
740 minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj )
741 END IF
742 ELSE
743 IF ( errest ) THEN
744 minrwrk = max( 7, lrwqp3, lrwcon, lrwsvdj )
745 ELSE
746 minrwrk = max( 7, lrwqp3, lrwsvdj )
747 END IF
748 END IF
749 IF ( rowpiv .OR. l2tran ) miniwrk = miniwrk + m
750 ELSE IF ( rsvec .AND. (.NOT.lsvec) ) THEN
751* .. minimal and optimal sizes of the complex workspace if the
752* singular values and the right singular vectors are requested
753 IF ( errest ) THEN
754 minwrk = max( n+lwqp3, lwcon, lwsvdj, n+lwlqf,
755 $ 2*n+lwqrf, n+lwsvdj, n+lwunmlq )
756 ELSE
757 minwrk = max( n+lwqp3, lwsvdj, n+lwlqf, 2*n+lwqrf,
758 $ n+lwsvdj, n+lwunmlq )
759 END IF
760 IF ( lquery ) THEN
761 CALL cgesvj( 'L', 'U', 'N', n,n, u, ldu, sva, n, a,
762 $ lda, cdummy, -1, rdummy, -1, ierr )
763 lwrk_cgesvj = real( cdummy(1) )
764 CALL cunmlq( 'L', 'C', n, n, n, a, lda, cdummy,
765 $ v, ldv, cdummy, -1, ierr )
766 lwrk_cunmlq = real( cdummy(1) )
767 IF ( errest ) THEN
768 optwrk = max( n+lwrk_cgeqp3, lwcon, lwrk_cgesvj,
769 $ n+lwrk_cgelqf, 2*n+lwrk_cgeqrf,
770 $ n+lwrk_cgesvj, n+lwrk_cunmlq )
771 ELSE
772 optwrk = max( n+lwrk_cgeqp3, lwrk_cgesvj,n+lwrk_cgelqf,
773 $ 2*n+lwrk_cgeqrf, n+lwrk_cgesvj,
774 $ n+lwrk_cunmlq )
775 END IF
776 END IF
777 IF ( l2tran .OR. rowpiv ) THEN
778 IF ( errest ) THEN
779 minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj, lrwcon )
780 ELSE
781 minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj )
782 END IF
783 ELSE
784 IF ( errest ) THEN
785 minrwrk = max( 7, lrwqp3, lrwsvdj, lrwcon )
786 ELSE
787 minrwrk = max( 7, lrwqp3, lrwsvdj )
788 END IF
789 END IF
790 IF ( rowpiv .OR. l2tran ) miniwrk = miniwrk + m
791 ELSE IF ( lsvec .AND. (.NOT.rsvec) ) THEN
792* .. minimal and optimal sizes of the complex workspace if the
793* singular values and the left singular vectors are requested
794 IF ( errest ) THEN
795 minwrk = n + max( lwqp3,lwcon,n+lwqrf,lwsvdj,lwunmqrm )
796 ELSE
797 minwrk = n + max( lwqp3, n+lwqrf, lwsvdj, lwunmqrm )
798 END IF
799 IF ( lquery ) THEN
800 CALL cgesvj( 'L', 'U', 'N', n,n, u, ldu, sva, n, a,
801 $ lda, cdummy, -1, rdummy, -1, ierr )
802 lwrk_cgesvj = real( cdummy(1) )
803 CALL cunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,
804 $ ldu, cdummy, -1, ierr )
805 lwrk_cunmqrm = real( cdummy(1) )
806 IF ( errest ) THEN
807 optwrk = n + max( lwrk_cgeqp3, lwcon, n+lwrk_cgeqrf,
808 $ lwrk_cgesvj, lwrk_cunmqrm )
809 ELSE
810 optwrk = n + max( lwrk_cgeqp3, n+lwrk_cgeqrf,
811 $ lwrk_cgesvj, lwrk_cunmqrm )
812 END IF
813 END IF
814 IF ( l2tran .OR. rowpiv ) THEN
815 IF ( errest ) THEN
816 minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj, lrwcon )
817 ELSE
818 minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj )
819 END IF
820 ELSE
821 IF ( errest ) THEN
822 minrwrk = max( 7, lrwqp3, lrwsvdj, lrwcon )
823 ELSE
824 minrwrk = max( 7, lrwqp3, lrwsvdj )
825 END IF
826 END IF
827 IF ( rowpiv .OR. l2tran ) miniwrk = miniwrk + m
828 ELSE
829* .. minimal and optimal sizes of the complex workspace if the
830* full SVD is requested
831 IF ( .NOT. jracc ) THEN
832 IF ( errest ) THEN
833 minwrk = max( n+lwqp3, n+lwcon, 2*n+n**2+lwcon,
834 $ 2*n+lwqrf, 2*n+lwqp3,
835 $ 2*n+n**2+n+lwlqf, 2*n+n**2+n+n**2+lwcon,
836 $ 2*n+n**2+n+lwsvdj, 2*n+n**2+n+lwsvdjv,
837 $ 2*n+n**2+n+lwunmqr,2*n+n**2+n+lwunmlq,
838 $ n+n**2+lwsvdj, n+lwunmqrm )
839 ELSE
840 minwrk = max( n+lwqp3, 2*n+n**2+lwcon,
841 $ 2*n+lwqrf, 2*n+lwqp3,
842 $ 2*n+n**2+n+lwlqf, 2*n+n**2+n+n**2+lwcon,
843 $ 2*n+n**2+n+lwsvdj, 2*n+n**2+n+lwsvdjv,
844 $ 2*n+n**2+n+lwunmqr,2*n+n**2+n+lwunmlq,
845 $ n+n**2+lwsvdj, n+lwunmqrm )
846 END IF
847 miniwrk = miniwrk + n
848 IF ( rowpiv .OR. l2tran ) miniwrk = miniwrk + m
849 ELSE
850 IF ( errest ) THEN
851 minwrk = max( n+lwqp3, n+lwcon, 2*n+lwqrf,
852 $ 2*n+n**2+lwsvdjv, 2*n+n**2+n+lwunmqr,
853 $ n+lwunmqrm )
854 ELSE
855 minwrk = max( n+lwqp3, 2*n+lwqrf,
856 $ 2*n+n**2+lwsvdjv, 2*n+n**2+n+lwunmqr,
857 $ n+lwunmqrm )
858 END IF
859 IF ( rowpiv .OR. l2tran ) miniwrk = miniwrk + m
860 END IF
861 IF ( lquery ) THEN
862 CALL cunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,
863 $ ldu, cdummy, -1, ierr )
864 lwrk_cunmqrm = real( cdummy(1) )
865 CALL cunmqr( 'L', 'N', n, n, n, a, lda, cdummy, u,
866 $ ldu, cdummy, -1, ierr )
867 lwrk_cunmqr = real( cdummy(1) )
868 IF ( .NOT. jracc ) THEN
869 CALL cgeqp3( n,n, a, lda, iwork, cdummy,cdummy, -1,
870 $ rdummy, ierr )
871 lwrk_cgeqp3n = real( cdummy(1) )
872 CALL cgesvj( 'L', 'U', 'N', n, n, u, ldu, sva,
873 $ n, v, ldv, cdummy, -1, rdummy, -1, ierr )
874 lwrk_cgesvj = real( cdummy(1) )
875 CALL cgesvj( 'U', 'U', 'N', n, n, u, ldu, sva,
876 $ n, v, ldv, cdummy, -1, rdummy, -1, ierr )
877 lwrk_cgesvju = real( cdummy(1) )
878 CALL cgesvj( 'L', 'U', 'V', n, n, u, ldu, sva,
879 $ n, v, ldv, cdummy, -1, rdummy, -1, ierr )
880 lwrk_cgesvjv = real( cdummy(1) )
881 CALL cunmlq( 'L', 'C', n, n, n, a, lda, cdummy,
882 $ v, ldv, cdummy, -1, ierr )
883 lwrk_cunmlq = real( cdummy(1) )
884 IF ( errest ) THEN
885 optwrk = max( n+lwrk_cgeqp3, n+lwcon,
886 $ 2*n+n**2+lwcon, 2*n+lwrk_cgeqrf,
887 $ 2*n+lwrk_cgeqp3n,
888 $ 2*n+n**2+n+lwrk_cgelqf,
889 $ 2*n+n**2+n+n**2+lwcon,
890 $ 2*n+n**2+n+lwrk_cgesvj,
891 $ 2*n+n**2+n+lwrk_cgesvjv,
892 $ 2*n+n**2+n+lwrk_cunmqr,
893 $ 2*n+n**2+n+lwrk_cunmlq,
894 $ n+n**2+lwrk_cgesvju,
895 $ n+lwrk_cunmqrm )
896 ELSE
897 optwrk = max( n+lwrk_cgeqp3,
898 $ 2*n+n**2+lwcon, 2*n+lwrk_cgeqrf,
899 $ 2*n+lwrk_cgeqp3n,
900 $ 2*n+n**2+n+lwrk_cgelqf,
901 $ 2*n+n**2+n+n**2+lwcon,
902 $ 2*n+n**2+n+lwrk_cgesvj,
903 $ 2*n+n**2+n+lwrk_cgesvjv,
904 $ 2*n+n**2+n+lwrk_cunmqr,
905 $ 2*n+n**2+n+lwrk_cunmlq,
906 $ n+n**2+lwrk_cgesvju,
907 $ n+lwrk_cunmqrm )
908 END IF
909 ELSE
910 CALL cgesvj( 'L', 'U', 'V', n, n, u, ldu, sva,
911 $ n, v, ldv, cdummy, -1, rdummy, -1, ierr )
912 lwrk_cgesvjv = real( cdummy(1) )
913 CALL cunmqr( 'L', 'N', n, n, n, cdummy, n, cdummy,
914 $ v, ldv, cdummy, -1, ierr )
915 lwrk_cunmqr = real( cdummy(1) )
916 CALL cunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,
917 $ ldu, cdummy, -1, ierr )
918 lwrk_cunmqrm = real( cdummy(1) )
919 IF ( errest ) THEN
920 optwrk = max( n+lwrk_cgeqp3, n+lwcon,
921 $ 2*n+lwrk_cgeqrf, 2*n+n**2,
922 $ 2*n+n**2+lwrk_cgesvjv,
923 $ 2*n+n**2+n+lwrk_cunmqr,n+lwrk_cunmqrm )
924 ELSE
925 optwrk = max( n+lwrk_cgeqp3, 2*n+lwrk_cgeqrf,
926 $ 2*n+n**2, 2*n+n**2+lwrk_cgesvjv,
927 $ 2*n+n**2+n+lwrk_cunmqr,
928 $ n+lwrk_cunmqrm )
929 END IF
930 END IF
931 END IF
932 IF ( l2tran .OR. rowpiv ) THEN
933 minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj, lrwcon )
934 ELSE
935 minrwrk = max( 7, lrwqp3, lrwsvdj, lrwcon )
936 END IF
937 END IF
938 minwrk = max( 2, minwrk )
939 optwrk = max( optwrk, minwrk )
940 IF ( lwork .LT. minwrk .AND. (.NOT.lquery) ) info = - 17
941 IF ( lrwork .LT. minrwrk .AND. (.NOT.lquery) ) info = - 19
942 END IF
943*
944 IF ( info .NE. 0 ) THEN
945* #:(
946 CALL xerbla( 'CGEJSV', - info )
947 RETURN
948 ELSE IF ( lquery ) THEN
949 cwork(1) = optwrk
950 cwork(2) = minwrk
951 rwork(1) = minrwrk
952 iwork(1) = max( 4, miniwrk )
953 RETURN
954 END IF
955*
956* Quick return for void matrix (Y3K safe)
957* #:)
958 IF ( ( m .EQ. 0 ) .OR. ( n .EQ. 0 ) ) THEN
959 iwork(1:4) = 0
960 rwork(1:7) = 0
961 RETURN
962 ENDIF
963*
964* Determine whether the matrix U should be M x N or M x M
965*
966 IF ( lsvec ) THEN
967 n1 = n
968 IF ( lsame( jobu, 'F' ) ) n1 = m
969 END IF
970*
971* Set numerical parameters
972*
973*! NOTE: Make sure SLAMCH() does not fail on the target architecture.
974*
975 epsln = slamch('Epsilon')
976 sfmin = slamch('SafeMinimum')
977 small = sfmin / epsln
978 big = slamch('O')
979* BIG = ONE / SFMIN
980*
981* Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N
982*
983*(!) If necessary, scale SVA() to protect the largest norm from
984* overflow. It is possible that this scaling pushes the smallest
985* column norm left from the underflow threshold (extreme case).
986*
987 scalem = one / sqrt(real(m)*real(n))
988 noscal = .true.
989 goscal = .true.
990 DO 1874 p = 1, n
991 aapp = zero
992 aaqq = one
993 CALL classq( m, a(1,p), 1, aapp, aaqq )
994 IF ( aapp .GT. big ) THEN
995 info = - 9
996 CALL xerbla( 'CGEJSV', -info )
997 RETURN
998 END IF
999 aaqq = sqrt(aaqq)
1000 IF ( ( aapp .LT. (big / aaqq) ) .AND. noscal ) THEN
1001 sva(p) = aapp * aaqq
1002 ELSE
1003 noscal = .false.
1004 sva(p) = aapp * ( aaqq * scalem )
1005 IF ( goscal ) THEN
1006 goscal = .false.
1007 CALL sscal( p-1, scalem, sva, 1 )
1008 END IF
1009 END IF
1010 1874 CONTINUE
1011*
1012 IF ( noscal ) scalem = one
1013*
1014 aapp = zero
1015 aaqq = big
1016 DO 4781 p = 1, n
1017 aapp = max( aapp, sva(p) )
1018 IF ( sva(p) .NE. zero ) aaqq = min( aaqq, sva(p) )
1019 4781 CONTINUE
1020*
1021* Quick return for zero M x N matrix
1022* #:)
1023 IF ( aapp .EQ. zero ) THEN
1024 IF ( lsvec ) CALL claset( 'G', m, n1, czero, cone, u, ldu )
1025 IF ( rsvec ) CALL claset( 'G', n, n, czero, cone, v, ldv )
1026 rwork(1) = one
1027 rwork(2) = one
1028 IF ( errest ) rwork(3) = one
1029 IF ( lsvec .AND. rsvec ) THEN
1030 rwork(4) = one
1031 rwork(5) = one
1032 END IF
1033 IF ( l2tran ) THEN
1034 rwork(6) = zero
1035 rwork(7) = zero
1036 END IF
1037 iwork(1) = 0
1038 iwork(2) = 0
1039 iwork(3) = 0
1040 iwork(4) = -1
1041 RETURN
1042 END IF
1043*
1044* Issue warning if denormalized column norms detected. Override the
1045* high relative accuracy request. Issue licence to kill nonzero columns
1046* (set them to zero) whose norm is less than sigma_max / BIG (roughly).
1047* #:(
1048 warning = 0
1049 IF ( aaqq .LE. sfmin ) THEN
1050 l2rank = .true.
1051 l2kill = .true.
1052 warning = 1
1053 END IF
1054*
1055* Quick return for one-column matrix
1056* #:)
1057 IF ( n .EQ. 1 ) THEN
1058*
1059 IF ( lsvec ) THEN
1060 CALL clascl( 'G',0,0,sva(1),scalem, m,1,a(1,1),lda,ierr )
1061 CALL clacpy( 'A', m, 1, a, lda, u, ldu )
1062* computing all M left singular vectors of the M x 1 matrix
1063 IF ( n1 .NE. n ) THEN
1064 CALL cgeqrf( m, n, u,ldu, cwork, cwork(n+1),lwork-n,ierr )
1065 CALL cungqr( m,n1,1, u,ldu,cwork,cwork(n+1),lwork-n,ierr )
1066 CALL ccopy( m, a(1,1), 1, u(1,1), 1 )
1067 END IF
1068 END IF
1069 IF ( rsvec ) THEN
1070 v(1,1) = cone
1071 END IF
1072 IF ( sva(1) .LT. (big*scalem) ) THEN
1073 sva(1) = sva(1) / scalem
1074 scalem = one
1075 END IF
1076 rwork(1) = one / scalem
1077 rwork(2) = one
1078 IF ( sva(1) .NE. zero ) THEN
1079 iwork(1) = 1
1080 IF ( ( sva(1) / scalem) .GE. sfmin ) THEN
1081 iwork(2) = 1
1082 ELSE
1083 iwork(2) = 0
1084 END IF
1085 ELSE
1086 iwork(1) = 0
1087 iwork(2) = 0
1088 END IF
1089 iwork(3) = 0
1090 iwork(4) = -1
1091 IF ( errest ) rwork(3) = one
1092 IF ( lsvec .AND. rsvec ) THEN
1093 rwork(4) = one
1094 rwork(5) = one
1095 END IF
1096 IF ( l2tran ) THEN
1097 rwork(6) = zero
1098 rwork(7) = zero
1099 END IF
1100 RETURN
1101*
1102 END IF
1103*
1104 transp = .false.
1105*
1106 aatmax = -one
1107 aatmin = big
1108 IF ( rowpiv .OR. l2tran ) THEN
1109*
1110* Compute the row norms, needed to determine row pivoting sequence
1111* (in the case of heavily row weighted A, row pivoting is strongly
1112* advised) and to collect information needed to compare the
1113* structures of A * A^* and A^* * A (in the case L2TRAN.EQ..TRUE.).
1114*
1115 IF ( l2tran ) THEN
1116 DO 1950 p = 1, m
1117 xsc = zero
1118 temp1 = one
1119 CALL classq( n, a(p,1), lda, xsc, temp1 )
1120* CLASSQ gets both the ell_2 and the ell_infinity norm
1121* in one pass through the vector
1122 rwork(m+p) = xsc * scalem
1123 rwork(p) = xsc * (scalem*sqrt(temp1))
1124 aatmax = max( aatmax, rwork(p) )
1125 IF (rwork(p) .NE. zero)
1126 $ aatmin = min(aatmin,rwork(p))
1127 1950 CONTINUE
1128 ELSE
1129 DO 1904 p = 1, m
1130 rwork(m+p) = scalem*abs( a(p,icamax(n,a(p,1),lda)) )
1131 aatmax = max( aatmax, rwork(m+p) )
1132 aatmin = min( aatmin, rwork(m+p) )
1133 1904 CONTINUE
1134 END IF
1135*
1136 END IF
1137*
1138* For square matrix A try to determine whether A^* would be better
1139* input for the preconditioned Jacobi SVD, with faster convergence.
1140* The decision is based on an O(N) function of the vector of column
1141* and row norms of A, based on the Shannon entropy. This should give
1142* the right choice in most cases when the difference actually matters.
1143* It may fail and pick the slower converging side.
1144*
1145 entra = zero
1146 entrat = zero
1147 IF ( l2tran ) THEN
1148*
1149 xsc = zero
1150 temp1 = one
1151 CALL slassq( n, sva, 1, xsc, temp1 )
1152 temp1 = one / temp1
1153*
1154 entra = zero
1155 DO 1113 p = 1, n
1156 big1 = ( ( sva(p) / xsc )**2 ) * temp1
1157 IF ( big1 .NE. zero ) entra = entra + big1 * alog(big1)
1158 1113 CONTINUE
1159 entra = - entra / alog(real(n))
1160*
1161* Now, SVA().^2/Trace(A^* * A) is a point in the probability simplex.
1162* It is derived from the diagonal of A^* * A. Do the same with the
1163* diagonal of A * A^*, compute the entropy of the corresponding
1164* probability distribution. Note that A * A^* and A^* * A have the
1165* same trace.
1166*
1167 entrat = zero
1168 DO 1114 p = 1, m
1169 big1 = ( ( rwork(p) / xsc )**2 ) * temp1
1170 IF ( big1 .NE. zero ) entrat = entrat + big1 * alog(big1)
1171 1114 CONTINUE
1172 entrat = - entrat / alog(real(m))
1173*
1174* Analyze the entropies and decide A or A^*. Smaller entropy
1175* usually means better input for the algorithm.
1176*
1177 transp = ( entrat .LT. entra )
1178*
1179* If A^* is better than A, take the adjoint of A. This is allowed
1180* only for square matrices, M=N.
1181 IF ( transp ) THEN
1182* In an optimal implementation, this trivial transpose
1183* should be replaced with faster transpose.
1184 DO 1115 p = 1, n - 1
1185 a(p,p) = conjg(a(p,p))
1186 DO 1116 q = p + 1, n
1187 ctemp = conjg(a(q,p))
1188 a(q,p) = conjg(a(p,q))
1189 a(p,q) = ctemp
1190 1116 CONTINUE
1191 1115 CONTINUE
1192 a(n,n) = conjg(a(n,n))
1193 DO 1117 p = 1, n
1194 rwork(m+p) = sva(p)
1195 sva(p) = rwork(p)
1196* previously computed row 2-norms are now column 2-norms
1197* of the transposed matrix
1198 1117 CONTINUE
1199 temp1 = aapp
1200 aapp = aatmax
1201 aatmax = temp1
1202 temp1 = aaqq
1203 aaqq = aatmin
1204 aatmin = temp1
1205 kill = lsvec
1206 lsvec = rsvec
1207 rsvec = kill
1208 IF ( lsvec ) n1 = n
1209*
1210 rowpiv = .true.
1211 END IF
1212*
1213 END IF
1214* END IF L2TRAN
1215*
1216* Scale the matrix so that its maximal singular value remains less
1217* than SQRT(BIG) -- the matrix is scaled so that its maximal column
1218* has Euclidean norm equal to SQRT(BIG/N). The only reason to keep
1219* SQRT(BIG) instead of BIG is the fact that CGEJSV uses LAPACK and
1220* BLAS routines that, in some implementations, are not capable of
1221* working in the full interval [SFMIN,BIG] and that they may provoke
1222* overflows in the intermediate results. If the singular values spread
1223* from SFMIN to BIG, then CGESVJ will compute them. So, in that case,
1224* one should use CGESVJ instead of CGEJSV.
1225 big1 = sqrt( big )
1226 temp1 = sqrt( big / real(n) )
1227* >> for future updates: allow bigger range, i.e. the largest column
1228* will be allowed up to BIG/N and CGESVJ will do the rest. However, for
1229* this all other (LAPACK) components must allow such a range.
1230* TEMP1 = BIG/REAL(N)
1231* TEMP1 = BIG * EPSLN this should 'almost' work with current LAPACK components
1232 CALL slascl( 'G', 0, 0, aapp, temp1, n, 1, sva, n, ierr )
1233 IF ( aaqq .GT. (aapp * sfmin) ) THEN
1234 aaqq = ( aaqq / aapp ) * temp1
1235 ELSE
1236 aaqq = ( aaqq * temp1 ) / aapp
1237 END IF
1238 temp1 = temp1 * scalem
1239 CALL clascl( 'G', 0, 0, aapp, temp1, m, n, a, lda, ierr )
1240*
1241* To undo scaling at the end of this procedure, multiply the
1242* computed singular values with USCAL2 / USCAL1.
1243*
1244 uscal1 = temp1
1245 uscal2 = aapp
1246*
1247 IF ( l2kill ) THEN
1248* L2KILL enforces computation of nonzero singular values in
1249* the restricted range of condition number of the initial A,
1250* sigma_max(A) / sigma_min(A) approx. SQRT(BIG)/SQRT(SFMIN).
1251 xsc = sqrt( sfmin )
1252 ELSE
1253 xsc = small
1254*
1255* Now, if the condition number of A is too big,
1256* sigma_max(A) / sigma_min(A) .GT. SQRT(BIG/N) * EPSLN / SFMIN,
1257* as a precaution measure, the full SVD is computed using CGESVJ
1258* with accumulated Jacobi rotations. This provides numerically
1259* more robust computation, at the cost of slightly increased run
1260* time. Depending on the concrete implementation of BLAS and LAPACK
1261* (i.e. how they behave in presence of extreme ill-conditioning) the
1262* implementor may decide to remove this switch.
1263 IF ( ( aaqq.LT.sqrt(sfmin) ) .AND. lsvec .AND. rsvec ) THEN
1264 jracc = .true.
1265 END IF
1266*
1267 END IF
1268 IF ( aaqq .LT. xsc ) THEN
1269 DO 700 p = 1, n
1270 IF ( sva(p) .LT. xsc ) THEN
1271 CALL claset( 'A', m, 1, czero, czero, a(1,p), lda )
1272 sva(p) = zero
1273 END IF
1274 700 CONTINUE
1275 END IF
1276*
1277* Preconditioning using QR factorization with pivoting
1278*
1279 IF ( rowpiv ) THEN
1280* Optional row permutation (Bjoerck row pivoting):
1281* A result by Cox and Higham shows that the Bjoerck's
1282* row pivoting combined with standard column pivoting
1283* has similar effect as Powell-Reid complete pivoting.
1284* The ell-infinity norms of A are made nonincreasing.
1285 IF ( ( lsvec .AND. rsvec ) .AND. .NOT.( jracc ) ) THEN
1286 iwoff = 2*n
1287 ELSE
1288 iwoff = n
1289 END IF
1290 DO 1952 p = 1, m - 1
1291 q = isamax( m-p+1, rwork(m+p), 1 ) + p - 1
1292 iwork(iwoff+p) = q
1293 IF ( p .NE. q ) THEN
1294 temp1 = rwork(m+p)
1295 rwork(m+p) = rwork(m+q)
1296 rwork(m+q) = temp1
1297 END IF
1298 1952 CONTINUE
1299 CALL claswp( n, a, lda, 1, m-1, iwork(iwoff+1), 1 )
1300 END IF
1301*
1302* End of the preparation phase (scaling, optional sorting and
1303* transposing, optional flushing of small columns).
1304*
1305* Preconditioning
1306*
1307* If the full SVD is needed, the right singular vectors are computed
1308* from a matrix equation, and for that we need theoretical analysis
1309* of the Businger-Golub pivoting. So we use CGEQP3 as the first RR QRF.
1310* In all other cases the first RR QRF can be chosen by other criteria
1311* (eg speed by replacing global with restricted window pivoting, such
1312* as in xGEQPX from TOMS # 782). Good results will be obtained using
1313* xGEQPX with properly (!) chosen numerical parameters.
1314* Any improvement of CGEQP3 improves overall performance of CGEJSV.
1315*
1316* A * P1 = Q1 * [ R1^* 0]^*:
1317 DO 1963 p = 1, n
1318* .. all columns are free columns
1319 iwork(p) = 0
1320 1963 CONTINUE
1321 CALL cgeqp3( m, n, a, lda, iwork, cwork, cwork(n+1), lwork-n,
1322 $ rwork, ierr )
1323*
1324* The upper triangular matrix R1 from the first QRF is inspected for
1325* rank deficiency and possibilities for deflation, or possible
1326* ill-conditioning. Depending on the user specified flag L2RANK,
1327* the procedure explores possibilities to reduce the numerical
1328* rank by inspecting the computed upper triangular factor. If
1329* L2RANK or L2ABER are up, then CGEJSV will compute the SVD of
1330* A + dA, where ||dA|| <= f(M,N)*EPSLN.
1331*
1332 nr = 1
1333 IF ( l2aber ) THEN
1334* Standard absolute error bound suffices. All sigma_i with
1335* sigma_i < N*EPSLN*||A|| are flushed to zero. This is an
1336* aggressive enforcement of lower numerical rank by introducing a
1337* backward error of the order of N*EPSLN*||A||.
1338 temp1 = sqrt(real(n))*epsln
1339 DO 3001 p = 2, n
1340 IF ( abs(a(p,p)) .GE. (temp1*abs(a(1,1))) ) THEN
1341 nr = nr + 1
1342 ELSE
1343 GO TO 3002
1344 END IF
1345 3001 CONTINUE
1346 3002 CONTINUE
1347 ELSE IF ( l2rank ) THEN
1348* .. similarly as above, only slightly more gentle (less aggressive).
1349* Sudden drop on the diagonal of R1 is used as the criterion for
1350* close-to-rank-deficient.
1351 temp1 = sqrt(sfmin)
1352 DO 3401 p = 2, n
1353 IF ( ( abs(a(p,p)) .LT. (epsln*abs(a(p-1,p-1))) ) .OR.
1354 $ ( abs(a(p,p)) .LT. small ) .OR.
1355 $ ( l2kill .AND. (abs(a(p,p)) .LT. temp1) ) ) GO TO 3402
1356 nr = nr + 1
1357 3401 CONTINUE
1358 3402 CONTINUE
1359*
1360 ELSE
1361* The goal is high relative accuracy. However, if the matrix
1362* has high scaled condition number the relative accuracy is in
1363* general not feasible. Later on, a condition number estimator
1364* will be deployed to estimate the scaled condition number.
1365* Here we just remove the underflowed part of the triangular
1366* factor. This prevents the situation in which the code is
1367* working hard to get the accuracy not warranted by the data.
1368 temp1 = sqrt(sfmin)
1369 DO 3301 p = 2, n
1370 IF ( ( abs(a(p,p)) .LT. small ) .OR.
1371 $ ( l2kill .AND. (abs(a(p,p)) .LT. temp1) ) ) GO TO 3302
1372 nr = nr + 1
1373 3301 CONTINUE
1374 3302 CONTINUE
1375*
1376 END IF
1377*
1378 almort = .false.
1379 IF ( nr .EQ. n ) THEN
1380 maxprj = one
1381 DO 3051 p = 2, n
1382 temp1 = abs(a(p,p)) / sva(iwork(p))
1383 maxprj = min( maxprj, temp1 )
1384 3051 CONTINUE
1385 IF ( maxprj**2 .GE. one - real(n)*epsln ) almort = .true.
1386 END IF
1387*
1388*
1389 sconda = - one
1390 condr1 = - one
1391 condr2 = - one
1392*
1393 IF ( errest ) THEN
1394 IF ( n .EQ. nr ) THEN
1395 IF ( rsvec ) THEN
1396* .. V is available as workspace
1397 CALL clacpy( 'U', n, n, a, lda, v, ldv )
1398 DO 3053 p = 1, n
1399 temp1 = sva(iwork(p))
1400 CALL csscal( p, one/temp1, v(1,p), 1 )
1401 3053 CONTINUE
1402 IF ( lsvec )THEN
1403 CALL cpocon( 'U', n, v, ldv, one, temp1,
1404 $ cwork(n+1), rwork, ierr )
1405 ELSE
1406 CALL cpocon( 'U', n, v, ldv, one, temp1,
1407 $ cwork, rwork, ierr )
1408 END IF
1409*
1410 ELSE IF ( lsvec ) THEN
1411* .. U is available as workspace
1412 CALL clacpy( 'U', n, n, a, lda, u, ldu )
1413 DO 3054 p = 1, n
1414 temp1 = sva(iwork(p))
1415 CALL csscal( p, one/temp1, u(1,p), 1 )
1416 3054 CONTINUE
1417 CALL cpocon( 'U', n, u, ldu, one, temp1,
1418 $ cwork(n+1), rwork, ierr )
1419 ELSE
1420 CALL clacpy( 'U', n, n, a, lda, cwork, n )
1421*[] CALL CLACPY( 'U', N, N, A, LDA, CWORK(N+1), N )
1422* Change: here index shifted by N to the left, CWORK(1:N)
1423* not needed for SIGMA only computation
1424 DO 3052 p = 1, n
1425 temp1 = sva(iwork(p))
1426*[] CALL CSSCAL( p, ONE/TEMP1, CWORK(N+(p-1)*N+1), 1 )
1427 CALL csscal( p, one/temp1, cwork((p-1)*n+1), 1 )
1428 3052 CONTINUE
1429* .. the columns of R are scaled to have unit Euclidean lengths.
1430*[] CALL CPOCON( 'U', N, CWORK(N+1), N, ONE, TEMP1,
1431*[] $ CWORK(N+N*N+1), RWORK, IERR )
1432 CALL cpocon( 'U', n, cwork, n, one, temp1,
1433 $ cwork(n*n+1), rwork, ierr )
1434*
1435 END IF
1436 IF ( temp1 .NE. zero ) THEN
1437 sconda = one / sqrt(temp1)
1438 ELSE
1439 sconda = - one
1440 END IF
1441* SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1).
1442* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
1443 ELSE
1444 sconda = - one
1445 END IF
1446 END IF
1447*
1448 l2pert = l2pert .AND. ( abs( a(1,1)/a(nr,nr) ) .GT. sqrt(big1) )
1449* If there is no violent scaling, artificial perturbation is not needed.
1450*
1451* Phase 3:
1452*
1453 IF ( .NOT. ( rsvec .OR. lsvec ) ) THEN
1454*
1455* Singular Values only
1456*
1457* .. transpose A(1:NR,1:N)
1458 DO 1946 p = 1, min( n-1, nr )
1459 CALL ccopy( n-p, a(p,p+1), lda, a(p+1,p), 1 )
1460 CALL clacgv( n-p+1, a(p,p), 1 )
1461 1946 CONTINUE
1462 IF ( nr .EQ. n ) a(n,n) = conjg(a(n,n))
1463*
1464* The following two DO-loops introduce small relative perturbation
1465* into the strict upper triangle of the lower triangular matrix.
1466* Small entries below the main diagonal are also changed.
1467* This modification is useful if the computing environment does not
1468* provide/allow FLUSH TO ZERO underflow, for it prevents many
1469* annoying denormalized numbers in case of strongly scaled matrices.
1470* The perturbation is structured so that it does not introduce any
1471* new perturbation of the singular values, and it does not destroy
1472* the job done by the preconditioner.
1473* The licence for this perturbation is in the variable L2PERT, which
1474* should be .FALSE. if FLUSH TO ZERO underflow is active.
1475*
1476 IF ( .NOT. almort ) THEN
1477*
1478 IF ( l2pert ) THEN
1479* XSC = SQRT(SMALL)
1480 xsc = epsln / real(n)
1481 DO 4947 q = 1, nr
1482 ctemp = cmplx(xsc*abs(a(q,q)),zero)
1483 DO 4949 p = 1, n
1484 IF ( ( (p.GT.q) .AND. (abs(a(p,q)).LE.temp1) )
1485 $ .OR. ( p .LT. q ) )
1486* $ A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) )
1487 $ a(p,q) = ctemp
1488 4949 CONTINUE
1489 4947 CONTINUE
1490 ELSE
1491 CALL claset( 'U', nr-1,nr-1, czero,czero, a(1,2),lda )
1492 END IF
1493*
1494* .. second preconditioning using the QR factorization
1495*
1496 CALL cgeqrf( n,nr, a,lda, cwork, cwork(n+1),lwork-n, ierr )
1497*
1498* .. and transpose upper to lower triangular
1499 DO 1948 p = 1, nr - 1
1500 CALL ccopy( nr-p, a(p,p+1), lda, a(p+1,p), 1 )
1501 CALL clacgv( nr-p+1, a(p,p), 1 )
1502 1948 CONTINUE
1503*
1504 END IF
1505*
1506* Row-cyclic Jacobi SVD algorithm with column pivoting
1507*
1508* .. again some perturbation (a "background noise") is added
1509* to drown denormals
1510 IF ( l2pert ) THEN
1511* XSC = SQRT(SMALL)
1512 xsc = epsln / real(n)
1513 DO 1947 q = 1, nr
1514 ctemp = cmplx(xsc*abs(a(q,q)),zero)
1515 DO 1949 p = 1, nr
1516 IF ( ( (p.GT.q) .AND. (abs(a(p,q)).LE.temp1) )
1517 $ .OR. ( p .LT. q ) )
1518* $ A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) )
1519 $ a(p,q) = ctemp
1520 1949 CONTINUE
1521 1947 CONTINUE
1522 ELSE
1523 CALL claset( 'U', nr-1, nr-1, czero, czero, a(1,2), lda )
1524 END IF
1525*
1526* .. and one-sided Jacobi rotations are started on a lower
1527* triangular matrix (plus perturbation which is ignored in
1528* the part which destroys triangular form (confusing?!))
1529*
1530 CALL cgesvj( 'L', 'N', 'N', nr, nr, a, lda, sva,
1531 $ n, v, ldv, cwork, lwork, rwork, lrwork, info )
1532*
1533 scalem = rwork(1)
1534 numrank = nint(rwork(2))
1535*
1536*
1537 ELSE IF ( ( rsvec .AND. ( .NOT. lsvec ) .AND. ( .NOT. jracc ) )
1538 $ .OR.
1539 $ ( jracc .AND. ( .NOT. lsvec ) .AND. ( nr .NE. n ) ) ) THEN
1540*
1541* -> Singular Values and Right Singular Vectors <-
1542*
1543 IF ( almort ) THEN
1544*
1545* .. in this case NR equals N
1546 DO 1998 p = 1, nr
1547 CALL ccopy( n-p+1, a(p,p), lda, v(p,p), 1 )
1548 CALL clacgv( n-p+1, v(p,p), 1 )
1549 1998 CONTINUE
1550 CALL claset( 'U', nr-1,nr-1, czero, czero, v(1,2), ldv )
1551*
1552 CALL cgesvj( 'L','U','N', n, nr, v, ldv, sva, nr, a, lda,
1553 $ cwork, lwork, rwork, lrwork, info )
1554 scalem = rwork(1)
1555 numrank = nint(rwork(2))
1556
1557 ELSE
1558*
1559* .. two more QR factorizations ( one QRF is not enough, two require
1560* accumulated product of Jacobi rotations, three are perfect )
1561*
1562 CALL claset( 'L', nr-1,nr-1, czero, czero, a(2,1), lda )
1563 CALL cgelqf( nr,n, a, lda, cwork, cwork(n+1), lwork-n, ierr)
1564 CALL clacpy( 'L', nr, nr, a, lda, v, ldv )
1565 CALL claset( 'U', nr-1,nr-1, czero, czero, v(1,2), ldv )
1566 CALL cgeqrf( nr, nr, v, ldv, cwork(n+1), cwork(2*n+1),
1567 $ lwork-2*n, ierr )
1568 DO 8998 p = 1, nr
1569 CALL ccopy( nr-p+1, v(p,p), ldv, v(p,p), 1 )
1570 CALL clacgv( nr-p+1, v(p,p), 1 )
1571 8998 CONTINUE
1572 CALL claset('U', nr-1, nr-1, czero, czero, v(1,2), ldv)
1573*
1574 CALL cgesvj( 'L', 'U','N', nr, nr, v,ldv, sva, nr, u,
1575 $ ldu, cwork(n+1), lwork-n, rwork, lrwork, info )
1576 scalem = rwork(1)
1577 numrank = nint(rwork(2))
1578 IF ( nr .LT. n ) THEN
1579 CALL claset( 'A',n-nr, nr, czero,czero, v(nr+1,1), ldv )
1580 CALL claset( 'A',nr, n-nr, czero,czero, v(1,nr+1), ldv )
1581 CALL claset( 'A',n-nr,n-nr,czero,cone, v(nr+1,nr+1),ldv )
1582 END IF
1583*
1584 CALL cunmlq( 'L', 'C', n, n, nr, a, lda, cwork,
1585 $ v, ldv, cwork(n+1), lwork-n, ierr )
1586*
1587 END IF
1588* .. permute the rows of V
1589* DO 8991 p = 1, N
1590* CALL CCOPY( N, V(p,1), LDV, A(IWORK(p),1), LDA )
1591* 8991 CONTINUE
1592* CALL CLACPY( 'All', N, N, A, LDA, V, LDV )
1593 CALL clapmr( .false., n, n, v, ldv, iwork )
1594*
1595 IF ( transp ) THEN
1596 CALL clacpy( 'A', n, n, v, ldv, u, ldu )
1597 END IF
1598*
1599 ELSE IF ( jracc .AND. (.NOT. lsvec) .AND. ( nr.EQ. n ) ) THEN
1600*
1601 CALL claset( 'L', n-1,n-1, czero, czero, a(2,1), lda )
1602*
1603 CALL cgesvj( 'U','N','V', n, n, a, lda, sva, n, v, ldv,
1604 $ cwork, lwork, rwork, lrwork, info )
1605 scalem = rwork(1)
1606 numrank = nint(rwork(2))
1607 CALL clapmr( .false., n, n, v, ldv, iwork )
1608*
1609 ELSE IF ( lsvec .AND. ( .NOT. rsvec ) ) THEN
1610*
1611* .. Singular Values and Left Singular Vectors ..
1612*
1613* .. second preconditioning step to avoid need to accumulate
1614* Jacobi rotations in the Jacobi iterations.
1615 DO 1965 p = 1, nr
1616 CALL ccopy( n-p+1, a(p,p), lda, u(p,p), 1 )
1617 CALL clacgv( n-p+1, u(p,p), 1 )
1618 1965 CONTINUE
1619 CALL claset( 'U', nr-1, nr-1, czero, czero, u(1,2), ldu )
1620*
1621 CALL cgeqrf( n, nr, u, ldu, cwork(n+1), cwork(2*n+1),
1622 $ lwork-2*n, ierr )
1623*
1624 DO 1967 p = 1, nr - 1
1625 CALL ccopy( nr-p, u(p,p+1), ldu, u(p+1,p), 1 )
1626 CALL clacgv( n-p+1, u(p,p), 1 )
1627 1967 CONTINUE
1628 CALL claset( 'U', nr-1, nr-1, czero, czero, u(1,2), ldu )
1629*
1630 CALL cgesvj( 'L', 'U', 'N', nr,nr, u, ldu, sva, nr, a,
1631 $ lda, cwork(n+1), lwork-n, rwork, lrwork, info )
1632 scalem = rwork(1)
1633 numrank = nint(rwork(2))
1634*
1635 IF ( nr .LT. m ) THEN
1636 CALL claset( 'A', m-nr, nr,czero, czero, u(nr+1,1), ldu )
1637 IF ( nr .LT. n1 ) THEN
1638 CALL claset( 'A',nr, n1-nr, czero, czero, u(1,nr+1),ldu )
1639 CALL claset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1),ldu )
1640 END IF
1641 END IF
1642*
1643 CALL cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,
1644 $ ldu, cwork(n+1), lwork-n, ierr )
1645*
1646 IF ( rowpiv )
1647 $ CALL claswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 )
1648*
1649 DO 1974 p = 1, n1
1650 xsc = one / scnrm2( m, u(1,p), 1 )
1651 CALL csscal( m, xsc, u(1,p), 1 )
1652 1974 CONTINUE
1653*
1654 IF ( transp ) THEN
1655 CALL clacpy( 'A', n, n, u, ldu, v, ldv )
1656 END IF
1657*
1658 ELSE
1659*
1660* .. Full SVD ..
1661*
1662 IF ( .NOT. jracc ) THEN
1663*
1664 IF ( .NOT. almort ) THEN
1665*
1666* Second Preconditioning Step (QRF [with pivoting])
1667* Note that the composition of TRANSPOSE, QRF and TRANSPOSE is
1668* equivalent to an LQF CALL. Since in many libraries the QRF
1669* seems to be better optimized than the LQF, we do explicit
1670* transpose and use the QRF. This is subject to changes in an
1671* optimized implementation of CGEJSV.
1672*
1673 DO 1968 p = 1, nr
1674 CALL ccopy( n-p+1, a(p,p), lda, v(p,p), 1 )
1675 CALL clacgv( n-p+1, v(p,p), 1 )
1676 1968 CONTINUE
1677*
1678* .. the following two loops perturb small entries to avoid
1679* denormals in the second QR factorization, where they are
1680* as good as zeros. This is done to avoid painfully slow
1681* computation with denormals. The relative size of the perturbation
1682* is a parameter that can be changed by the implementer.
1683* This perturbation device will be obsolete on machines with
1684* properly implemented arithmetic.
1685* To switch it off, set L2PERT=.FALSE. To remove it from the
1686* code, remove the action under L2PERT=.TRUE., leave the ELSE part.
1687* The following two loops should be blocked and fused with the
1688* transposed copy above.
1689*
1690 IF ( l2pert ) THEN
1691 xsc = sqrt(small)
1692 DO 2969 q = 1, nr
1693 ctemp = cmplx(xsc*abs( v(q,q) ),zero)
1694 DO 2968 p = 1, n
1695 IF ( ( p .GT. q ) .AND. ( abs(v(p,q)) .LE. temp1 )
1696 $ .OR. ( p .LT. q ) )
1697* $ V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) )
1698 $ v(p,q) = ctemp
1699 IF ( p .LT. q ) v(p,q) = - v(p,q)
1700 2968 CONTINUE
1701 2969 CONTINUE
1702 ELSE
1703 CALL claset( 'U', nr-1, nr-1, czero, czero, v(1,2), ldv )
1704 END IF
1705*
1706* Estimate the row scaled condition number of R1
1707* (If R1 is rectangular, N > NR, then the condition number
1708* of the leading NR x NR submatrix is estimated.)
1709*
1710 CALL clacpy( 'L', nr, nr, v, ldv, cwork(2*n+1), nr )
1711 DO 3950 p = 1, nr
1712 temp1 = scnrm2(nr-p+1,cwork(2*n+(p-1)*nr+p),1)
1713 CALL csscal(nr-p+1,one/temp1,cwork(2*n+(p-1)*nr+p),1)
1714 3950 CONTINUE
1715 CALL cpocon('L',nr,cwork(2*n+1),nr,one,temp1,
1716 $ cwork(2*n+nr*nr+1),rwork,ierr)
1717 condr1 = one / sqrt(temp1)
1718* .. here need a second opinion on the condition number
1719* .. then assume worst case scenario
1720* R1 is OK for inverse <=> CONDR1 .LT. REAL(N)
1721* more conservative <=> CONDR1 .LT. SQRT(REAL(N))
1722*
1723 cond_ok = sqrt(sqrt(real(nr)))
1724*[TP] COND_OK is a tuning parameter.
1725*
1726 IF ( condr1 .LT. cond_ok ) THEN
1727* .. the second QRF without pivoting. Note: in an optimized
1728* implementation, this QRF should be implemented as the QRF
1729* of a lower triangular matrix.
1730* R1^* = Q2 * R2
1731 CALL cgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2*n+1),
1732 $ lwork-2*n, ierr )
1733*
1734 IF ( l2pert ) THEN
1735 xsc = sqrt(small)/epsln
1736 DO 3959 p = 2, nr
1737 DO 3958 q = 1, p - 1
1738 ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),
1739 $ zero)
1740 IF ( abs(v(q,p)) .LE. temp1 )
1741* $ V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) )
1742 $ v(q,p) = ctemp
1743 3958 CONTINUE
1744 3959 CONTINUE
1745 END IF
1746*
1747 IF ( nr .NE. n )
1748 $ CALL clacpy( 'A', n, nr, v, ldv, cwork(2*n+1), n )
1749* .. save ...
1750*
1751* .. this transposed copy should be better than naive
1752 DO 1969 p = 1, nr - 1
1753 CALL ccopy( nr-p, v(p,p+1), ldv, v(p+1,p), 1 )
1754 CALL clacgv(nr-p+1, v(p,p), 1 )
1755 1969 CONTINUE
1756 v(nr,nr)=conjg(v(nr,nr))
1757*
1758 condr2 = condr1
1759*
1760 ELSE
1761*
1762* .. ill-conditioned case: second QRF with pivoting
1763* Note that windowed pivoting would be equally good
1764* numerically, and more run-time efficient. So, in
1765* an optimal implementation, the next call to CGEQP3
1766* should be replaced with eg. CALL CGEQPX (ACM TOMS #782)
1767* with properly (carefully) chosen parameters.
1768*
1769* R1^* * P2 = Q2 * R2
1770 DO 3003 p = 1, nr
1771 iwork(n+p) = 0
1772 3003 CONTINUE
1773 CALL cgeqp3( n, nr, v, ldv, iwork(n+1), cwork(n+1),
1774 $ cwork(2*n+1), lwork-2*n, rwork, ierr )
1775** CALL CGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1),
1776** $ LWORK-2*N, IERR )
1777 IF ( l2pert ) THEN
1778 xsc = sqrt(small)
1779 DO 3969 p = 2, nr
1780 DO 3968 q = 1, p - 1
1781 ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),
1782 $ zero)
1783 IF ( abs(v(q,p)) .LE. temp1 )
1784* $ V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) )
1785 $ v(q,p) = ctemp
1786 3968 CONTINUE
1787 3969 CONTINUE
1788 END IF
1789*
1790 CALL clacpy( 'A', n, nr, v, ldv, cwork(2*n+1), n )
1791*
1792 IF ( l2pert ) THEN
1793 xsc = sqrt(small)
1794 DO 8970 p = 2, nr
1795 DO 8971 q = 1, p - 1
1796 ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),
1797 $ zero)
1798* V(p,q) = - TEMP1*( V(q,p) / ABS(V(q,p)) )
1799 v(p,q) = - ctemp
1800 8971 CONTINUE
1801 8970 CONTINUE
1802 ELSE
1803 CALL claset( 'L',nr-1,nr-1,czero,czero,v(2,1),ldv )
1804 END IF
1805* Now, compute R2 = L3 * Q3, the LQ factorization.
1806 CALL cgelqf( nr, nr, v, ldv, cwork(2*n+n*nr+1),
1807 $ cwork(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr, ierr )
1808* .. and estimate the condition number
1809 CALL clacpy( 'L',nr,nr,v,ldv,cwork(2*n+n*nr+nr+1),nr )
1810 DO 4950 p = 1, nr
1811 temp1 = scnrm2( p, cwork(2*n+n*nr+nr+p), nr )
1812 CALL csscal( p, one/temp1, cwork(2*n+n*nr+nr+p), nr )
1813 4950 CONTINUE
1814 CALL cpocon( 'L',nr,cwork(2*n+n*nr+nr+1),nr,one,temp1,
1815 $ cwork(2*n+n*nr+nr+nr*nr+1),rwork,ierr )
1816 condr2 = one / sqrt(temp1)
1817*
1818*
1819 IF ( condr2 .GE. cond_ok ) THEN
1820* .. save the Householder vectors used for Q3
1821* (this overwrites the copy of R2, as it will not be
1822* needed in this branch, but it does not overwritte the
1823* Huseholder vectors of Q2.).
1824 CALL clacpy( 'U', nr, nr, v, ldv, cwork(2*n+1), n )
1825* .. and the rest of the information on Q3 is in
1826* WORK(2*N+N*NR+1:2*N+N*NR+N)
1827 END IF
1828*
1829 END IF
1830*
1831 IF ( l2pert ) THEN
1832 xsc = sqrt(small)
1833 DO 4968 q = 2, nr
1834 ctemp = xsc * v(q,q)
1835 DO 4969 p = 1, q - 1
1836* V(p,q) = - TEMP1*( V(p,q) / ABS(V(p,q)) )
1837 v(p,q) = - ctemp
1838 4969 CONTINUE
1839 4968 CONTINUE
1840 ELSE
1841 CALL claset( 'U', nr-1,nr-1, czero,czero, v(1,2), ldv )
1842 END IF
1843*
1844* Second preconditioning finished; continue with Jacobi SVD
1845* The input matrix is lower trinagular.
1846*
1847* Recover the right singular vectors as solution of a well
1848* conditioned triangular matrix equation.
1849*
1850 IF ( condr1 .LT. cond_ok ) THEN
1851*
1852 CALL cgesvj( 'L','U','N',nr,nr,v,ldv,sva,nr,u, ldu,
1853 $ cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,rwork,
1854 $ lrwork, info )
1855 scalem = rwork(1)
1856 numrank = nint(rwork(2))
1857 DO 3970 p = 1, nr
1858 CALL ccopy( nr, v(1,p), 1, u(1,p), 1 )
1859 CALL csscal( nr, sva(p), v(1,p), 1 )
1860 3970 CONTINUE
1861
1862* .. pick the right matrix equation and solve it
1863*
1864 IF ( nr .EQ. n ) THEN
1865* :)) .. best case, R1 is inverted. The solution of this matrix
1866* equation is Q2*V2 = the product of the Jacobi rotations
1867* used in CGESVJ, premultiplied with the orthogonal matrix
1868* from the second QR factorization.
1869 CALL ctrsm('L','U','N','N', nr,nr,cone, a,lda, v,ldv)
1870 ELSE
1871* .. R1 is well conditioned, but non-square. Adjoint of R2
1872* is inverted to get the product of the Jacobi rotations
1873* used in CGESVJ. The Q-factor from the second QR
1874* factorization is then built in explicitly.
1875 CALL ctrsm('L','U','C','N',nr,nr,cone,cwork(2*n+1),
1876 $ n,v,ldv)
1877 IF ( nr .LT. n ) THEN
1878 CALL claset('A',n-nr,nr,czero,czero,v(nr+1,1),ldv)
1879 CALL claset('A',nr,n-nr,czero,czero,v(1,nr+1),ldv)
1880 CALL claset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv)
1881 END IF
1882 CALL cunmqr('L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),
1883 $ v,ldv,cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr)
1884 END IF
1885*
1886 ELSE IF ( condr2 .LT. cond_ok ) THEN
1887*
1888* The matrix R2 is inverted. The solution of the matrix equation
1889* is Q3^* * V3 = the product of the Jacobi rotations (appplied to
1890* the lower triangular L3 from the LQ factorization of
1891* R2=L3*Q3), pre-multiplied with the transposed Q3.
1892 CALL cgesvj( 'L', 'U', 'N', nr, nr, v, ldv, sva, nr, u,
1893 $ ldu, cwork(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr,
1894 $ rwork, lrwork, info )
1895 scalem = rwork(1)
1896 numrank = nint(rwork(2))
1897 DO 3870 p = 1, nr
1898 CALL ccopy( nr, v(1,p), 1, u(1,p), 1 )
1899 CALL csscal( nr, sva(p), u(1,p), 1 )
1900 3870 CONTINUE
1901 CALL ctrsm('L','U','N','N',nr,nr,cone,cwork(2*n+1),n,
1902 $ u,ldu)
1903* .. apply the permutation from the second QR factorization
1904 DO 873 q = 1, nr
1905 DO 872 p = 1, nr
1906 cwork(2*n+n*nr+nr+iwork(n+p)) = u(p,q)
1907 872 CONTINUE
1908 DO 874 p = 1, nr
1909 u(p,q) = cwork(2*n+n*nr+nr+p)
1910 874 CONTINUE
1911 873 CONTINUE
1912 IF ( nr .LT. n ) THEN
1913 CALL claset( 'A',n-nr,nr,czero,czero,v(nr+1,1),ldv )
1914 CALL claset( 'A',nr,n-nr,czero,czero,v(1,nr+1),ldv )
1915 CALL claset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv)
1916 END IF
1917 CALL cunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),
1918 $ v,ldv,cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
1919 ELSE
1920* Last line of defense.
1921* #:( This is a rather pathological case: no scaled condition
1922* improvement after two pivoted QR factorizations. Other
1923* possibility is that the rank revealing QR factorization
1924* or the condition estimator has failed, or the COND_OK
1925* is set very close to ONE (which is unnecessary). Normally,
1926* this branch should never be executed, but in rare cases of
1927* failure of the RRQR or condition estimator, the last line of
1928* defense ensures that CGEJSV completes the task.
1929* Compute the full SVD of L3 using CGESVJ with explicit
1930* accumulation of Jacobi rotations.
1931 CALL cgesvj( 'L', 'U', 'V', nr, nr, v, ldv, sva, nr, u,
1932 $ ldu, cwork(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr,
1933 $ rwork, lrwork, info )
1934 scalem = rwork(1)
1935 numrank = nint(rwork(2))
1936 IF ( nr .LT. n ) THEN
1937 CALL claset( 'A',n-nr,nr,czero,czero,v(nr+1,1),ldv )
1938 CALL claset( 'A',nr,n-nr,czero,czero,v(1,nr+1),ldv )
1939 CALL claset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv)
1940 END IF
1941 CALL cunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),
1942 $ v,ldv,cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
1943*
1944 CALL cunmlq( 'L', 'C', nr, nr, nr, cwork(2*n+1), n,
1945 $ cwork(2*n+n*nr+1), u, ldu, cwork(2*n+n*nr+nr+1),
1946 $ lwork-2*n-n*nr-nr, ierr )
1947 DO 773 q = 1, nr
1948 DO 772 p = 1, nr
1949 cwork(2*n+n*nr+nr+iwork(n+p)) = u(p,q)
1950 772 CONTINUE
1951 DO 774 p = 1, nr
1952 u(p,q) = cwork(2*n+n*nr+nr+p)
1953 774 CONTINUE
1954 773 CONTINUE
1955*
1956 END IF
1957*
1958* Permute the rows of V using the (column) permutation from the
1959* first QRF. Also, scale the columns to make them unit in
1960* Euclidean norm. This applies to all cases.
1961*
1962 temp1 = sqrt(real(n)) * epsln
1963 DO 1972 q = 1, n
1964 DO 972 p = 1, n
1965 cwork(2*n+n*nr+nr+iwork(p)) = v(p,q)
1966 972 CONTINUE
1967 DO 973 p = 1, n
1968 v(p,q) = cwork(2*n+n*nr+nr+p)
1969 973 CONTINUE
1970 xsc = one / scnrm2( n, v(1,q), 1 )
1971 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1972 $ CALL csscal( n, xsc, v(1,q), 1 )
1973 1972 CONTINUE
1974* At this moment, V contains the right singular vectors of A.
1975* Next, assemble the left singular vector matrix U (M x N).
1976 IF ( nr .LT. m ) THEN
1977 CALL claset('A', m-nr, nr, czero, czero, u(nr+1,1), ldu)
1978 IF ( nr .LT. n1 ) THEN
1979 CALL claset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu)
1980 CALL claset('A',m-nr,n1-nr,czero,cone,
1981 $ u(nr+1,nr+1),ldu)
1982 END IF
1983 END IF
1984*
1985* The Q matrix from the first QRF is built into the left singular
1986* matrix U. This applies to all cases.
1987*
1988 CALL cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,
1989 $ ldu, cwork(n+1), lwork-n, ierr )
1990
1991* The columns of U are normalized. The cost is O(M*N) flops.
1992 temp1 = sqrt(real(m)) * epsln
1993 DO 1973 p = 1, nr
1994 xsc = one / scnrm2( m, u(1,p), 1 )
1995 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1996 $ CALL csscal( m, xsc, u(1,p), 1 )
1997 1973 CONTINUE
1998*
1999* If the initial QRF is computed with row pivoting, the left
2000* singular vectors must be adjusted.
2001*
2002 IF ( rowpiv )
2003 $ CALL claswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 )
2004*
2005 ELSE
2006*
2007* .. the initial matrix A has almost orthogonal columns and
2008* the second QRF is not needed
2009*
2010 CALL clacpy( 'U', n, n, a, lda, cwork(n+1), n )
2011 IF ( l2pert ) THEN
2012 xsc = sqrt(small)
2013 DO 5970 p = 2, n
2014 ctemp = xsc * cwork( n + (p-1)*n + p )
2015 DO 5971 q = 1, p - 1
2016* CWORK(N+(q-1)*N+p)=-TEMP1 * ( CWORK(N+(p-1)*N+q) /
2017* $ ABS(CWORK(N+(p-1)*N+q)) )
2018 cwork(n+(q-1)*n+p)=-ctemp
2019 5971 CONTINUE
2020 5970 CONTINUE
2021 ELSE
2022 CALL claset( 'L',n-1,n-1,czero,czero,cwork(n+2),n )
2023 END IF
2024*
2025 CALL cgesvj( 'U', 'U', 'N', n, n, cwork(n+1), n, sva,
2026 $ n, u, ldu, cwork(n+n*n+1), lwork-n-n*n, rwork, lrwork,
2027 $ info )
2028*
2029 scalem = rwork(1)
2030 numrank = nint(rwork(2))
2031 DO 6970 p = 1, n
2032 CALL ccopy( n, cwork(n+(p-1)*n+1), 1, u(1,p), 1 )
2033 CALL csscal( n, sva(p), cwork(n+(p-1)*n+1), 1 )
2034 6970 CONTINUE
2035*
2036 CALL ctrsm( 'L', 'U', 'N', 'N', n, n,
2037 $ cone, a, lda, cwork(n+1), n )
2038 DO 6972 p = 1, n
2039 CALL ccopy( n, cwork(n+p), n, v(iwork(p),1), ldv )
2040 6972 CONTINUE
2041 temp1 = sqrt(real(n))*epsln
2042 DO 6971 p = 1, n
2043 xsc = one / scnrm2( n, v(1,p), 1 )
2044 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
2045 $ CALL csscal( n, xsc, v(1,p), 1 )
2046 6971 CONTINUE
2047*
2048* Assemble the left singular vector matrix U (M x N).
2049*
2050 IF ( n .LT. m ) THEN
2051 CALL claset( 'A', m-n, n, czero, czero, u(n+1,1), ldu )
2052 IF ( n .LT. n1 ) THEN
2053 CALL claset('A',n, n1-n, czero, czero, u(1,n+1),ldu)
2054 CALL claset( 'A',m-n,n1-n, czero, cone,u(n+1,n+1),ldu)
2055 END IF
2056 END IF
2057 CALL cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,
2058 $ ldu, cwork(n+1), lwork-n, ierr )
2059 temp1 = sqrt(real(m))*epsln
2060 DO 6973 p = 1, n1
2061 xsc = one / scnrm2( m, u(1,p), 1 )
2062 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
2063 $ CALL csscal( m, xsc, u(1,p), 1 )
2064 6973 CONTINUE
2065*
2066 IF ( rowpiv )
2067 $ CALL claswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 )
2068*
2069 END IF
2070*
2071* end of the >> almost orthogonal case << in the full SVD
2072*
2073 ELSE
2074*
2075* This branch deploys a preconditioned Jacobi SVD with explicitly
2076* accumulated rotations. It is included as optional, mainly for
2077* experimental purposes. It does perform well, and can also be used.
2078* In this implementation, this branch will be automatically activated
2079* if the condition number sigma_max(A) / sigma_min(A) is predicted
2080* to be greater than the overflow threshold. This is because the
2081* a posteriori computation of the singular vectors assumes robust
2082* implementation of BLAS and some LAPACK procedures, capable of working
2083* in presence of extreme values, e.g. when the singular values spread from
2084* the underflow to the overflow threshold.
2085*
2086 DO 7968 p = 1, nr
2087 CALL ccopy( n-p+1, a(p,p), lda, v(p,p), 1 )
2088 CALL clacgv( n-p+1, v(p,p), 1 )
2089 7968 CONTINUE
2090*
2091 IF ( l2pert ) THEN
2092 xsc = sqrt(small/epsln)
2093 DO 5969 q = 1, nr
2094 ctemp = cmplx(xsc*abs( v(q,q) ),zero)
2095 DO 5968 p = 1, n
2096 IF ( ( p .GT. q ) .AND. ( abs(v(p,q)) .LE. temp1 )
2097 $ .OR. ( p .LT. q ) )
2098* $ V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) )
2099 $ v(p,q) = ctemp
2100 IF ( p .LT. q ) v(p,q) = - v(p,q)
2101 5968 CONTINUE
2102 5969 CONTINUE
2103 ELSE
2104 CALL claset( 'U', nr-1, nr-1, czero, czero, v(1,2), ldv )
2105 END IF
2106
2107 CALL cgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2*n+1),
2108 $ lwork-2*n, ierr )
2109 CALL clacpy( 'L', n, nr, v, ldv, cwork(2*n+1), n )
2110*
2111 DO 7969 p = 1, nr
2112 CALL ccopy( nr-p+1, v(p,p), ldv, u(p,p), 1 )
2113 CALL clacgv( nr-p+1, u(p,p), 1 )
2114 7969 CONTINUE
2115
2116 IF ( l2pert ) THEN
2117 xsc = sqrt(small/epsln)
2118 DO 9970 q = 2, nr
2119 DO 9971 p = 1, q - 1
2120 ctemp = cmplx(xsc * min(abs(u(p,p)),abs(u(q,q))),
2121 $ zero)
2122* U(p,q) = - TEMP1 * ( U(q,p) / ABS(U(q,p)) )
2123 u(p,q) = - ctemp
2124 9971 CONTINUE
2125 9970 CONTINUE
2126 ELSE
2127 CALL claset('U', nr-1, nr-1, czero, czero, u(1,2), ldu )
2128 END IF
2129
2130 CALL cgesvj( 'L', 'U', 'V', nr, nr, u, ldu, sva,
2131 $ n, v, ldv, cwork(2*n+n*nr+1), lwork-2*n-n*nr,
2132 $ rwork, lrwork, info )
2133 scalem = rwork(1)
2134 numrank = nint(rwork(2))
2135
2136 IF ( nr .LT. n ) THEN
2137 CALL claset( 'A',n-nr,nr,czero,czero,v(nr+1,1),ldv )
2138 CALL claset( 'A',nr,n-nr,czero,czero,v(1,nr+1),ldv )
2139 CALL claset( 'A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv )
2140 END IF
2141
2142 CALL cunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),
2143 $ v,ldv,cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
2144*
2145* Permute the rows of V using the (column) permutation from the
2146* first QRF. Also, scale the columns to make them unit in
2147* Euclidean norm. This applies to all cases.
2148*
2149 temp1 = sqrt(real(n)) * epsln
2150 DO 7972 q = 1, n
2151 DO 8972 p = 1, n
2152 cwork(2*n+n*nr+nr+iwork(p)) = v(p,q)
2153 8972 CONTINUE
2154 DO 8973 p = 1, n
2155 v(p,q) = cwork(2*n+n*nr+nr+p)
2156 8973 CONTINUE
2157 xsc = one / scnrm2( n, v(1,q), 1 )
2158 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
2159 $ CALL csscal( n, xsc, v(1,q), 1 )
2160 7972 CONTINUE
2161*
2162* At this moment, V contains the right singular vectors of A.
2163* Next, assemble the left singular vector matrix U (M x N).
2164*
2165 IF ( nr .LT. m ) THEN
2166 CALL claset( 'A', m-nr, nr, czero, czero, u(nr+1,1), ldu )
2167 IF ( nr .LT. n1 ) THEN
2168 CALL claset('A',nr, n1-nr, czero, czero, u(1,nr+1),ldu)
2169 CALL claset('A',m-nr,n1-nr, czero, cone,u(nr+1,nr+1),ldu)
2170 END IF
2171 END IF
2172*
2173 CALL cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,
2174 $ ldu, cwork(n+1), lwork-n, ierr )
2175*
2176 IF ( rowpiv )
2177 $ CALL claswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 )
2178*
2179*
2180 END IF
2181 IF ( transp ) THEN
2182* .. swap U and V because the procedure worked on A^*
2183 DO 6974 p = 1, n
2184 CALL cswap( n, u(1,p), 1, v(1,p), 1 )
2185 6974 CONTINUE
2186 END IF
2187*
2188 END IF
2189* end of the full SVD
2190*
2191* Undo scaling, if necessary (and possible)
2192*
2193 IF ( uscal2 .LE. (big/sva(1))*uscal1 ) THEN
2194 CALL slascl( 'G', 0, 0, uscal1, uscal2, nr, 1, sva, n, ierr )
2195 uscal1 = one
2196 uscal2 = one
2197 END IF
2198*
2199 IF ( nr .LT. n ) THEN
2200 DO 3004 p = nr+1, n
2201 sva(p) = zero
2202 3004 CONTINUE
2203 END IF
2204*
2205 rwork(1) = uscal2 * scalem
2206 rwork(2) = uscal1
2207 IF ( errest ) rwork(3) = sconda
2208 IF ( lsvec .AND. rsvec ) THEN
2209 rwork(4) = condr1
2210 rwork(5) = condr2
2211 END IF
2212 IF ( l2tran ) THEN
2213 rwork(6) = entra
2214 rwork(7) = entrat
2215 END IF
2216*
2217 iwork(1) = nr
2218 iwork(2) = numrank
2219 iwork(3) = warning
2220 IF ( transp ) THEN
2221 iwork(4) = 1
2222 ELSE
2223 iwork(4) = -1
2224 END IF
2225
2226*
2227 RETURN
2228* ..
2229* .. END OF CGEJSV
2230* ..
float cmplx[2]
Definition pblas.h:136
subroutine slassq(n, x, incx, scl, sumsq)
SLASSQ updates a sum of squares represented in scaled form.
Definition slassq.f90:137
subroutine classq(n, x, incx, scl, sumsq)
CLASSQ updates a sum of squares represented in scaled form.
Definition classq.f90:137
subroutine slascl(type, kl, ku, cfrom, cto, m, n, a, lda, info)
SLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition slascl.f:143
subroutine xerbla(srname, info)
XERBLA
Definition xerbla.f:60
logical function lsame(ca, cb)
LSAME
Definition lsame.f:53
integer function isamax(n, sx, incx)
ISAMAX
Definition isamax.f:71
integer function icamax(n, cx, incx)
ICAMAX
Definition icamax.f:71
subroutine cgeqrf(m, n, a, lda, tau, work, lwork, info)
CGEQRF
Definition cgeqrf.f:146
subroutine cgeqp3(m, n, a, lda, jpvt, tau, work, lwork, rwork, info)
CGEQP3
Definition cgeqp3.f:159
subroutine cgesvj(joba, jobu, jobv, m, n, a, lda, sva, mv, v, ldv, cwork, lwork, rwork, lrwork, info)
CGESVJ
Definition cgesvj.f:351
subroutine cgelqf(m, n, a, lda, tau, work, lwork, info)
CGELQF
Definition cgelqf.f:143
subroutine clascl(type, kl, ku, cfrom, cto, m, n, a, lda, info)
CLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition clascl.f:143
subroutine clapmr(forwrd, m, n, x, ldx, k)
CLAPMR rearranges rows of a matrix as specified by a permutation vector.
Definition clapmr.f:104
subroutine clacpy(uplo, m, n, a, lda, b, ldb)
CLACPY copies all or part of one two-dimensional array to another.
Definition clacpy.f:103
subroutine clacgv(n, x, incx)
CLACGV conjugates a complex vector.
Definition clacgv.f:74
subroutine claset(uplo, m, n, alpha, beta, a, lda)
CLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition claset.f:106
subroutine claswp(n, a, lda, k1, k2, ipiv, incx)
CLASWP performs a series of row interchanges on a general rectangular matrix.
Definition claswp.f:115
subroutine cunmqr(side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
CUNMQR
Definition cunmqr.f:168
subroutine cungqr(m, n, k, a, lda, tau, work, lwork, info)
CUNGQR
Definition cungqr.f:128
subroutine cunmlq(side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
CUNMLQ
Definition cunmlq.f:168
subroutine cpocon(uplo, n, a, lda, anorm, rcond, work, rwork, info)
CPOCON
Definition cpocon.f:121
subroutine cswap(n, cx, incx, cy, incy)
CSWAP
Definition cswap.f:81
subroutine csscal(n, sa, cx, incx)
CSSCAL
Definition csscal.f:78
subroutine ccopy(n, cx, incx, cy, incy)
CCOPY
Definition ccopy.f:81
subroutine ctrsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
CTRSM
Definition ctrsm.f:180
subroutine sscal(n, sa, sx, incx)
SSCAL
Definition sscal.f:79
real(wp) function scnrm2(n, x, incx)
SCNRM2
Definition scnrm2.f90:90
real function slamch(cmach)
SLAMCH
Definition slamch.f:68
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21

◆ cgesdd()

subroutine cgesdd ( character jobz,
integer m,
integer n,
complex, dimension( lda, * ) a,
integer lda,
real, dimension( * ) s,
complex, dimension( ldu, * ) u,
integer ldu,
complex, dimension( ldvt, * ) vt,
integer ldvt,
complex, dimension( * ) work,
integer lwork,
real, dimension( * ) rwork,
integer, dimension( * ) iwork,
integer info )

CGESDD

Download CGESDD + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> CGESDD computes the singular value decomposition (SVD) of a complex
!> M-by-N matrix A, optionally computing the left and/or right singular
!> vectors, by using divide-and-conquer method. The SVD is written
!>
!>      A = U * SIGMA * conjugate-transpose(V)
!>
!> where SIGMA is an M-by-N matrix which is zero except for its
!> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
!> V is an N-by-N unitary matrix.  The diagonal elements of SIGMA
!> are the singular values of A; they are real and non-negative, and
!> are returned in descending order.  The first min(m,n) columns of
!> U and V are the left and right singular vectors of A.
!>
!> Note that the routine returns VT = V**H, not V.
!>
!> The divide and conquer algorithm makes very mild assumptions about
!> floating point arithmetic. It will work on machines with a guard
!> digit in add/subtract, or on those binary machines without guard
!> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
!> Cray-2. It could conceivably fail on hexadecimal or decimal machines
!> without guard digits, but we know of none.
!> 
Parameters
[in]JOBZ
!>          JOBZ is CHARACTER*1
!>          Specifies options for computing all or part of the matrix U:
!>          = 'A':  all M columns of U and all N rows of V**H are
!>                  returned in the arrays U and VT;
!>          = 'S':  the first min(M,N) columns of U and the first
!>                  min(M,N) rows of V**H are returned in the arrays U
!>                  and VT;
!>          = 'O':  If M >= N, the first N columns of U are overwritten
!>                  in the array A and all rows of V**H are returned in
!>                  the array VT;
!>                  otherwise, all columns of U are returned in the
!>                  array U and the first M rows of V**H are overwritten
!>                  in the array A;
!>          = 'N':  no columns of U or rows of V**H are computed.
!> 
[in]M
!>          M is INTEGER
!>          The number of rows of the input matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>          The number of columns of the input matrix A.  N >= 0.
!> 
[in,out]A
!>          A is COMPLEX array, dimension (LDA,N)
!>          On entry, the M-by-N matrix A.
!>          On exit,
!>          if JOBZ = 'O',  A is overwritten with the first N columns
!>                          of U (the left singular vectors, stored
!>                          columnwise) if M >= N;
!>                          A is overwritten with the first M rows
!>                          of V**H (the right singular vectors, stored
!>                          rowwise) otherwise.
!>          if JOBZ .ne. 'O', the contents of A are destroyed.
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A.  LDA >= max(1,M).
!> 
[out]S
!>          S is REAL array, dimension (min(M,N))
!>          The singular values of A, sorted so that S(i) >= S(i+1).
!> 
[out]U
!>          U is COMPLEX array, dimension (LDU,UCOL)
!>          UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
!>          UCOL = min(M,N) if JOBZ = 'S'.
!>          If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
!>          unitary matrix U;
!>          if JOBZ = 'S', U contains the first min(M,N) columns of U
!>          (the left singular vectors, stored columnwise);
!>          if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
!> 
[in]LDU
!>          LDU is INTEGER
!>          The leading dimension of the array U.  LDU >= 1;
!>          if JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
!> 
[out]VT
!>          VT is COMPLEX array, dimension (LDVT,N)
!>          If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
!>          N-by-N unitary matrix V**H;
!>          if JOBZ = 'S', VT contains the first min(M,N) rows of
!>          V**H (the right singular vectors, stored rowwise);
!>          if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
!> 
[in]LDVT
!>          LDVT is INTEGER
!>          The leading dimension of the array VT.  LDVT >= 1;
!>          if JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
!>          if JOBZ = 'S', LDVT >= min(M,N).
!> 
[out]WORK
!>          WORK is COMPLEX array, dimension (MAX(1,LWORK))
!>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          The dimension of the array WORK. LWORK >= 1.
!>          If LWORK = -1, a workspace query is assumed.  The optimal
!>          size for the WORK array is calculated and stored in WORK(1),
!>          and no other work except argument checking is performed.
!>
!>          Let mx = max(M,N) and mn = min(M,N).
!>          If JOBZ = 'N', LWORK >= 2*mn + mx.
!>          If JOBZ = 'O', LWORK >= 2*mn*mn + 2*mn + mx.
!>          If JOBZ = 'S', LWORK >=   mn*mn + 3*mn.
!>          If JOBZ = 'A', LWORK >=   mn*mn + 2*mn + mx.
!>          These are not tight minimums in all cases; see comments inside code.
!>          For good performance, LWORK should generally be larger;
!>          a query is recommended.
!> 
[out]RWORK
!>          RWORK is REAL array, dimension (MAX(1,LRWORK))
!>          Let mx = max(M,N) and mn = min(M,N).
!>          If JOBZ = 'N',    LRWORK >= 5*mn (LAPACK <= 3.6 needs 7*mn);
!>          else if mx >> mn, LRWORK >= 5*mn*mn + 5*mn;
!>          else              LRWORK >= max( 5*mn*mn + 5*mn,
!>                                           2*mx*mn + 2*mn*mn + mn ).
!> 
[out]IWORK
!>          IWORK is INTEGER array, dimension (8*min(M,N))
!> 
[out]INFO
!>          INFO is INTEGER
!>          <  0:  if INFO = -i, the i-th argument had an illegal value.
!>          = -4:  if A had a NAN entry.
!>          >  0:  The updating process of SBDSDC did not converge.
!>          =  0:  successful exit.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Contributors:
Ming Gu and Huan Ren, Computer Science Division, University of California at Berkeley, USA

Definition at line 225 of file cgesdd.f.

227 implicit none
228*
229* -- LAPACK driver routine --
230* -- LAPACK is a software package provided by Univ. of Tennessee, --
231* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
232*
233* .. Scalar Arguments ..
234 CHARACTER JOBZ
235 INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
236* ..
237* .. Array Arguments ..
238 INTEGER IWORK( * )
239 REAL RWORK( * ), S( * )
240 COMPLEX A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
241 $ WORK( * )
242* ..
243*
244* =====================================================================
245*
246* .. Parameters ..
247 COMPLEX CZERO, CONE
248 parameter( czero = ( 0.0e+0, 0.0e+0 ),
249 $ cone = ( 1.0e+0, 0.0e+0 ) )
250 REAL ZERO, ONE
251 parameter( zero = 0.0e+0, one = 1.0e+0 )
252* ..
253* .. Local Scalars ..
254 LOGICAL LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS
255 INTEGER BLK, CHUNK, I, IE, IERR, IL, IR, IRU, IRVT,
256 $ ISCL, ITAU, ITAUP, ITAUQ, IU, IVT, LDWKVT,
257 $ LDWRKL, LDWRKR, LDWRKU, MAXWRK, MINMN, MINWRK,
258 $ MNTHR1, MNTHR2, NRWORK, NWORK, WRKBL
259 INTEGER LWORK_CGEBRD_MN, LWORK_CGEBRD_MM,
260 $ LWORK_CGEBRD_NN, LWORK_CGELQF_MN,
261 $ LWORK_CGEQRF_MN,
262 $ LWORK_CUNGBR_P_MN, LWORK_CUNGBR_P_NN,
263 $ LWORK_CUNGBR_Q_MN, LWORK_CUNGBR_Q_MM,
264 $ LWORK_CUNGLQ_MN, LWORK_CUNGLQ_NN,
265 $ LWORK_CUNGQR_MM, LWORK_CUNGQR_MN,
266 $ LWORK_CUNMBR_PRC_MM, LWORK_CUNMBR_QLN_MM,
267 $ LWORK_CUNMBR_PRC_MN, LWORK_CUNMBR_QLN_MN,
268 $ LWORK_CUNMBR_PRC_NN, LWORK_CUNMBR_QLN_NN
269 REAL ANRM, BIGNUM, EPS, SMLNUM
270* ..
271* .. Local Arrays ..
272 INTEGER IDUM( 1 )
273 REAL DUM( 1 )
274 COMPLEX CDUM( 1 )
275* ..
276* .. External Subroutines ..
277 EXTERNAL cgebrd, cgelqf, cgemm, cgeqrf, clacp2, clacpy,
280* ..
281* .. External Functions ..
282 LOGICAL LSAME, SISNAN
283 REAL SLAMCH, CLANGE, SROUNDUP_LWORK
284 EXTERNAL lsame, slamch, clange, sisnan,
286* ..
287* .. Intrinsic Functions ..
288 INTRINSIC int, max, min, sqrt
289* ..
290* .. Executable Statements ..
291*
292* Test the input arguments
293*
294 info = 0
295 minmn = min( m, n )
296 mnthr1 = int( minmn*17.0e0 / 9.0e0 )
297 mnthr2 = int( minmn*5.0e0 / 3.0e0 )
298 wntqa = lsame( jobz, 'A' )
299 wntqs = lsame( jobz, 'S' )
300 wntqas = wntqa .OR. wntqs
301 wntqo = lsame( jobz, 'O' )
302 wntqn = lsame( jobz, 'N' )
303 lquery = ( lwork.EQ.-1 )
304 minwrk = 1
305 maxwrk = 1
306*
307 IF( .NOT.( wntqa .OR. wntqs .OR. wntqo .OR. wntqn ) ) THEN
308 info = -1
309 ELSE IF( m.LT.0 ) THEN
310 info = -2
311 ELSE IF( n.LT.0 ) THEN
312 info = -3
313 ELSE IF( lda.LT.max( 1, m ) ) THEN
314 info = -5
315 ELSE IF( ldu.LT.1 .OR. ( wntqas .AND. ldu.LT.m ) .OR.
316 $ ( wntqo .AND. m.LT.n .AND. ldu.LT.m ) ) THEN
317 info = -8
318 ELSE IF( ldvt.LT.1 .OR. ( wntqa .AND. ldvt.LT.n ) .OR.
319 $ ( wntqs .AND. ldvt.LT.minmn ) .OR.
320 $ ( wntqo .AND. m.GE.n .AND. ldvt.LT.n ) ) THEN
321 info = -10
322 END IF
323*
324* Compute workspace
325* Note: Comments in the code beginning "Workspace:" describe the
326* minimal amount of workspace allocated at that point in the code,
327* as well as the preferred amount for good performance.
328* CWorkspace refers to complex workspace, and RWorkspace to
329* real workspace. NB refers to the optimal block size for the
330* immediately following subroutine, as returned by ILAENV.)
331*
332 IF( info.EQ.0 ) THEN
333 minwrk = 1
334 maxwrk = 1
335 IF( m.GE.n .AND. minmn.GT.0 ) THEN
336*
337* There is no complex work space needed for bidiagonal SVD
338* The real work space needed for bidiagonal SVD (sbdsdc) is
339* BDSPAC = 3*N*N + 4*N for singular values and vectors;
340* BDSPAC = 4*N for singular values only;
341* not including e, RU, and RVT matrices.
342*
343* Compute space preferred for each routine
344 CALL cgebrd( m, n, cdum(1), m, dum(1), dum(1), cdum(1),
345 $ cdum(1), cdum(1), -1, ierr )
346 lwork_cgebrd_mn = int( cdum(1) )
347*
348 CALL cgebrd( n, n, cdum(1), n, dum(1), dum(1), cdum(1),
349 $ cdum(1), cdum(1), -1, ierr )
350 lwork_cgebrd_nn = int( cdum(1) )
351*
352 CALL cgeqrf( m, n, cdum(1), m, cdum(1), cdum(1), -1, ierr )
353 lwork_cgeqrf_mn = int( cdum(1) )
354*
355 CALL cungbr( 'P', n, n, n, cdum(1), n, cdum(1), cdum(1),
356 $ -1, ierr )
357 lwork_cungbr_p_nn = int( cdum(1) )
358*
359 CALL cungbr( 'Q', m, m, n, cdum(1), m, cdum(1), cdum(1),
360 $ -1, ierr )
361 lwork_cungbr_q_mm = int( cdum(1) )
362*
363 CALL cungbr( 'Q', m, n, n, cdum(1), m, cdum(1), cdum(1),
364 $ -1, ierr )
365 lwork_cungbr_q_mn = int( cdum(1) )
366*
367 CALL cungqr( m, m, n, cdum(1), m, cdum(1), cdum(1),
368 $ -1, ierr )
369 lwork_cungqr_mm = int( cdum(1) )
370*
371 CALL cungqr( m, n, n, cdum(1), m, cdum(1), cdum(1),
372 $ -1, ierr )
373 lwork_cungqr_mn = int( cdum(1) )
374*
375 CALL cunmbr( 'P', 'R', 'C', n, n, n, cdum(1), n, cdum(1),
376 $ cdum(1), n, cdum(1), -1, ierr )
377 lwork_cunmbr_prc_nn = int( cdum(1) )
378*
379 CALL cunmbr( 'Q', 'L', 'N', m, m, n, cdum(1), m, cdum(1),
380 $ cdum(1), m, cdum(1), -1, ierr )
381 lwork_cunmbr_qln_mm = int( cdum(1) )
382*
383 CALL cunmbr( 'Q', 'L', 'N', m, n, n, cdum(1), m, cdum(1),
384 $ cdum(1), m, cdum(1), -1, ierr )
385 lwork_cunmbr_qln_mn = int( cdum(1) )
386*
387 CALL cunmbr( 'Q', 'L', 'N', n, n, n, cdum(1), n, cdum(1),
388 $ cdum(1), n, cdum(1), -1, ierr )
389 lwork_cunmbr_qln_nn = int( cdum(1) )
390*
391 IF( m.GE.mnthr1 ) THEN
392 IF( wntqn ) THEN
393*
394* Path 1 (M >> N, JOBZ='N')
395*
396 maxwrk = n + lwork_cgeqrf_mn
397 maxwrk = max( maxwrk, 2*n + lwork_cgebrd_nn )
398 minwrk = 3*n
399 ELSE IF( wntqo ) THEN
400*
401* Path 2 (M >> N, JOBZ='O')
402*
403 wrkbl = n + lwork_cgeqrf_mn
404 wrkbl = max( wrkbl, n + lwork_cungqr_mn )
405 wrkbl = max( wrkbl, 2*n + lwork_cgebrd_nn )
406 wrkbl = max( wrkbl, 2*n + lwork_cunmbr_qln_nn )
407 wrkbl = max( wrkbl, 2*n + lwork_cunmbr_prc_nn )
408 maxwrk = m*n + n*n + wrkbl
409 minwrk = 2*n*n + 3*n
410 ELSE IF( wntqs ) THEN
411*
412* Path 3 (M >> N, JOBZ='S')
413*
414 wrkbl = n + lwork_cgeqrf_mn
415 wrkbl = max( wrkbl, n + lwork_cungqr_mn )
416 wrkbl = max( wrkbl, 2*n + lwork_cgebrd_nn )
417 wrkbl = max( wrkbl, 2*n + lwork_cunmbr_qln_nn )
418 wrkbl = max( wrkbl, 2*n + lwork_cunmbr_prc_nn )
419 maxwrk = n*n + wrkbl
420 minwrk = n*n + 3*n
421 ELSE IF( wntqa ) THEN
422*
423* Path 4 (M >> N, JOBZ='A')
424*
425 wrkbl = n + lwork_cgeqrf_mn
426 wrkbl = max( wrkbl, n + lwork_cungqr_mm )
427 wrkbl = max( wrkbl, 2*n + lwork_cgebrd_nn )
428 wrkbl = max( wrkbl, 2*n + lwork_cunmbr_qln_nn )
429 wrkbl = max( wrkbl, 2*n + lwork_cunmbr_prc_nn )
430 maxwrk = n*n + wrkbl
431 minwrk = n*n + max( 3*n, n + m )
432 END IF
433 ELSE IF( m.GE.mnthr2 ) THEN
434*
435* Path 5 (M >> N, but not as much as MNTHR1)
436*
437 maxwrk = 2*n + lwork_cgebrd_mn
438 minwrk = 2*n + m
439 IF( wntqo ) THEN
440* Path 5o (M >> N, JOBZ='O')
441 maxwrk = max( maxwrk, 2*n + lwork_cungbr_p_nn )
442 maxwrk = max( maxwrk, 2*n + lwork_cungbr_q_mn )
443 maxwrk = maxwrk + m*n
444 minwrk = minwrk + n*n
445 ELSE IF( wntqs ) THEN
446* Path 5s (M >> N, JOBZ='S')
447 maxwrk = max( maxwrk, 2*n + lwork_cungbr_p_nn )
448 maxwrk = max( maxwrk, 2*n + lwork_cungbr_q_mn )
449 ELSE IF( wntqa ) THEN
450* Path 5a (M >> N, JOBZ='A')
451 maxwrk = max( maxwrk, 2*n + lwork_cungbr_p_nn )
452 maxwrk = max( maxwrk, 2*n + lwork_cungbr_q_mm )
453 END IF
454 ELSE
455*
456* Path 6 (M >= N, but not much larger)
457*
458 maxwrk = 2*n + lwork_cgebrd_mn
459 minwrk = 2*n + m
460 IF( wntqo ) THEN
461* Path 6o (M >= N, JOBZ='O')
462 maxwrk = max( maxwrk, 2*n + lwork_cunmbr_prc_nn )
463 maxwrk = max( maxwrk, 2*n + lwork_cunmbr_qln_mn )
464 maxwrk = maxwrk + m*n
465 minwrk = minwrk + n*n
466 ELSE IF( wntqs ) THEN
467* Path 6s (M >= N, JOBZ='S')
468 maxwrk = max( maxwrk, 2*n + lwork_cunmbr_qln_mn )
469 maxwrk = max( maxwrk, 2*n + lwork_cunmbr_prc_nn )
470 ELSE IF( wntqa ) THEN
471* Path 6a (M >= N, JOBZ='A')
472 maxwrk = max( maxwrk, 2*n + lwork_cunmbr_qln_mm )
473 maxwrk = max( maxwrk, 2*n + lwork_cunmbr_prc_nn )
474 END IF
475 END IF
476 ELSE IF( minmn.GT.0 ) THEN
477*
478* There is no complex work space needed for bidiagonal SVD
479* The real work space needed for bidiagonal SVD (sbdsdc) is
480* BDSPAC = 3*M*M + 4*M for singular values and vectors;
481* BDSPAC = 4*M for singular values only;
482* not including e, RU, and RVT matrices.
483*
484* Compute space preferred for each routine
485 CALL cgebrd( m, n, cdum(1), m, dum(1), dum(1), cdum(1),
486 $ cdum(1), cdum(1), -1, ierr )
487 lwork_cgebrd_mn = int( cdum(1) )
488*
489 CALL cgebrd( m, m, cdum(1), m, dum(1), dum(1), cdum(1),
490 $ cdum(1), cdum(1), -1, ierr )
491 lwork_cgebrd_mm = int( cdum(1) )
492*
493 CALL cgelqf( m, n, cdum(1), m, cdum(1), cdum(1), -1, ierr )
494 lwork_cgelqf_mn = int( cdum(1) )
495*
496 CALL cungbr( 'P', m, n, m, cdum(1), m, cdum(1), cdum(1),
497 $ -1, ierr )
498 lwork_cungbr_p_mn = int( cdum(1) )
499*
500 CALL cungbr( 'P', n, n, m, cdum(1), n, cdum(1), cdum(1),
501 $ -1, ierr )
502 lwork_cungbr_p_nn = int( cdum(1) )
503*
504 CALL cungbr( 'Q', m, m, n, cdum(1), m, cdum(1), cdum(1),
505 $ -1, ierr )
506 lwork_cungbr_q_mm = int( cdum(1) )
507*
508 CALL cunglq( m, n, m, cdum(1), m, cdum(1), cdum(1),
509 $ -1, ierr )
510 lwork_cunglq_mn = int( cdum(1) )
511*
512 CALL cunglq( n, n, m, cdum(1), n, cdum(1), cdum(1),
513 $ -1, ierr )
514 lwork_cunglq_nn = int( cdum(1) )
515*
516 CALL cunmbr( 'P', 'R', 'C', m, m, m, cdum(1), m, cdum(1),
517 $ cdum(1), m, cdum(1), -1, ierr )
518 lwork_cunmbr_prc_mm = int( cdum(1) )
519*
520 CALL cunmbr( 'P', 'R', 'C', m, n, m, cdum(1), m, cdum(1),
521 $ cdum(1), m, cdum(1), -1, ierr )
522 lwork_cunmbr_prc_mn = int( cdum(1) )
523*
524 CALL cunmbr( 'P', 'R', 'C', n, n, m, cdum(1), n, cdum(1),
525 $ cdum(1), n, cdum(1), -1, ierr )
526 lwork_cunmbr_prc_nn = int( cdum(1) )
527*
528 CALL cunmbr( 'Q', 'L', 'N', m, m, m, cdum(1), m, cdum(1),
529 $ cdum(1), m, cdum(1), -1, ierr )
530 lwork_cunmbr_qln_mm = int( cdum(1) )
531*
532 IF( n.GE.mnthr1 ) THEN
533 IF( wntqn ) THEN
534*
535* Path 1t (N >> M, JOBZ='N')
536*
537 maxwrk = m + lwork_cgelqf_mn
538 maxwrk = max( maxwrk, 2*m + lwork_cgebrd_mm )
539 minwrk = 3*m
540 ELSE IF( wntqo ) THEN
541*
542* Path 2t (N >> M, JOBZ='O')
543*
544 wrkbl = m + lwork_cgelqf_mn
545 wrkbl = max( wrkbl, m + lwork_cunglq_mn )
546 wrkbl = max( wrkbl, 2*m + lwork_cgebrd_mm )
547 wrkbl = max( wrkbl, 2*m + lwork_cunmbr_qln_mm )
548 wrkbl = max( wrkbl, 2*m + lwork_cunmbr_prc_mm )
549 maxwrk = m*n + m*m + wrkbl
550 minwrk = 2*m*m + 3*m
551 ELSE IF( wntqs ) THEN
552*
553* Path 3t (N >> M, JOBZ='S')
554*
555 wrkbl = m + lwork_cgelqf_mn
556 wrkbl = max( wrkbl, m + lwork_cunglq_mn )
557 wrkbl = max( wrkbl, 2*m + lwork_cgebrd_mm )
558 wrkbl = max( wrkbl, 2*m + lwork_cunmbr_qln_mm )
559 wrkbl = max( wrkbl, 2*m + lwork_cunmbr_prc_mm )
560 maxwrk = m*m + wrkbl
561 minwrk = m*m + 3*m
562 ELSE IF( wntqa ) THEN
563*
564* Path 4t (N >> M, JOBZ='A')
565*
566 wrkbl = m + lwork_cgelqf_mn
567 wrkbl = max( wrkbl, m + lwork_cunglq_nn )
568 wrkbl = max( wrkbl, 2*m + lwork_cgebrd_mm )
569 wrkbl = max( wrkbl, 2*m + lwork_cunmbr_qln_mm )
570 wrkbl = max( wrkbl, 2*m + lwork_cunmbr_prc_mm )
571 maxwrk = m*m + wrkbl
572 minwrk = m*m + max( 3*m, m + n )
573 END IF
574 ELSE IF( n.GE.mnthr2 ) THEN
575*
576* Path 5t (N >> M, but not as much as MNTHR1)
577*
578 maxwrk = 2*m + lwork_cgebrd_mn
579 minwrk = 2*m + n
580 IF( wntqo ) THEN
581* Path 5to (N >> M, JOBZ='O')
582 maxwrk = max( maxwrk, 2*m + lwork_cungbr_q_mm )
583 maxwrk = max( maxwrk, 2*m + lwork_cungbr_p_mn )
584 maxwrk = maxwrk + m*n
585 minwrk = minwrk + m*m
586 ELSE IF( wntqs ) THEN
587* Path 5ts (N >> M, JOBZ='S')
588 maxwrk = max( maxwrk, 2*m + lwork_cungbr_q_mm )
589 maxwrk = max( maxwrk, 2*m + lwork_cungbr_p_mn )
590 ELSE IF( wntqa ) THEN
591* Path 5ta (N >> M, JOBZ='A')
592 maxwrk = max( maxwrk, 2*m + lwork_cungbr_q_mm )
593 maxwrk = max( maxwrk, 2*m + lwork_cungbr_p_nn )
594 END IF
595 ELSE
596*
597* Path 6t (N > M, but not much larger)
598*
599 maxwrk = 2*m + lwork_cgebrd_mn
600 minwrk = 2*m + n
601 IF( wntqo ) THEN
602* Path 6to (N > M, JOBZ='O')
603 maxwrk = max( maxwrk, 2*m + lwork_cunmbr_qln_mm )
604 maxwrk = max( maxwrk, 2*m + lwork_cunmbr_prc_mn )
605 maxwrk = maxwrk + m*n
606 minwrk = minwrk + m*m
607 ELSE IF( wntqs ) THEN
608* Path 6ts (N > M, JOBZ='S')
609 maxwrk = max( maxwrk, 2*m + lwork_cunmbr_qln_mm )
610 maxwrk = max( maxwrk, 2*m + lwork_cunmbr_prc_mn )
611 ELSE IF( wntqa ) THEN
612* Path 6ta (N > M, JOBZ='A')
613 maxwrk = max( maxwrk, 2*m + lwork_cunmbr_qln_mm )
614 maxwrk = max( maxwrk, 2*m + lwork_cunmbr_prc_nn )
615 END IF
616 END IF
617 END IF
618 maxwrk = max( maxwrk, minwrk )
619 END IF
620 IF( info.EQ.0 ) THEN
621 work( 1 ) = sroundup_lwork( maxwrk )
622 IF( lwork.LT.minwrk .AND. .NOT. lquery ) THEN
623 info = -12
624 END IF
625 END IF
626*
627 IF( info.NE.0 ) THEN
628 CALL xerbla( 'CGESDD', -info )
629 RETURN
630 ELSE IF( lquery ) THEN
631 RETURN
632 END IF
633*
634* Quick return if possible
635*
636 IF( m.EQ.0 .OR. n.EQ.0 ) THEN
637 RETURN
638 END IF
639*
640* Get machine constants
641*
642 eps = slamch( 'P' )
643 smlnum = sqrt( slamch( 'S' ) ) / eps
644 bignum = one / smlnum
645*
646* Scale A if max element outside range [SMLNUM,BIGNUM]
647*
648 anrm = clange( 'M', m, n, a, lda, dum )
649 IF( sisnan( anrm ) ) THEN
650 info = -4
651 RETURN
652 END IF
653 iscl = 0
654 IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
655 iscl = 1
656 CALL clascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
657 ELSE IF( anrm.GT.bignum ) THEN
658 iscl = 1
659 CALL clascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
660 END IF
661*
662 IF( m.GE.n ) THEN
663*
664* A has at least as many rows as columns. If A has sufficiently
665* more rows than columns, first reduce using the QR
666* decomposition (if sufficient workspace available)
667*
668 IF( m.GE.mnthr1 ) THEN
669*
670 IF( wntqn ) THEN
671*
672* Path 1 (M >> N, JOBZ='N')
673* No singular vectors to be computed
674*
675 itau = 1
676 nwork = itau + n
677*
678* Compute A=Q*R
679* CWorkspace: need N [tau] + N [work]
680* CWorkspace: prefer N [tau] + N*NB [work]
681* RWorkspace: need 0
682*
683 CALL cgeqrf( m, n, a, lda, work( itau ), work( nwork ),
684 $ lwork-nwork+1, ierr )
685*
686* Zero out below R
687*
688 CALL claset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),
689 $ lda )
690 ie = 1
691 itauq = 1
692 itaup = itauq + n
693 nwork = itaup + n
694*
695* Bidiagonalize R in A
696* CWorkspace: need 2*N [tauq, taup] + N [work]
697* CWorkspace: prefer 2*N [tauq, taup] + 2*N*NB [work]
698* RWorkspace: need N [e]
699*
700 CALL cgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),
701 $ work( itaup ), work( nwork ), lwork-nwork+1,
702 $ ierr )
703 nrwork = ie + n
704*
705* Perform bidiagonal SVD, compute singular values only
706* CWorkspace: need 0
707* RWorkspace: need N [e] + BDSPAC
708*
709 CALL sbdsdc( 'U', 'N', n, s, rwork( ie ), dum,1,dum,1,
710 $ dum, idum, rwork( nrwork ), iwork, info )
711*
712 ELSE IF( wntqo ) THEN
713*
714* Path 2 (M >> N, JOBZ='O')
715* N left singular vectors to be overwritten on A and
716* N right singular vectors to be computed in VT
717*
718 iu = 1
719*
720* WORK(IU) is N by N
721*
722 ldwrku = n
723 ir = iu + ldwrku*n
724 IF( lwork .GE. m*n + n*n + 3*n ) THEN
725*
726* WORK(IR) is M by N
727*
728 ldwrkr = m
729 ELSE
730 ldwrkr = ( lwork - n*n - 3*n ) / n
731 END IF
732 itau = ir + ldwrkr*n
733 nwork = itau + n
734*
735* Compute A=Q*R
736* CWorkspace: need N*N [U] + N*N [R] + N [tau] + N [work]
737* CWorkspace: prefer N*N [U] + N*N [R] + N [tau] + N*NB [work]
738* RWorkspace: need 0
739*
740 CALL cgeqrf( m, n, a, lda, work( itau ), work( nwork ),
741 $ lwork-nwork+1, ierr )
742*
743* Copy R to WORK( IR ), zeroing out below it
744*
745 CALL clacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
746 CALL claset( 'L', n-1, n-1, czero, czero, work( ir+1 ),
747 $ ldwrkr )
748*
749* Generate Q in A
750* CWorkspace: need N*N [U] + N*N [R] + N [tau] + N [work]
751* CWorkspace: prefer N*N [U] + N*N [R] + N [tau] + N*NB [work]
752* RWorkspace: need 0
753*
754 CALL cungqr( m, n, n, a, lda, work( itau ),
755 $ work( nwork ), lwork-nwork+1, ierr )
756 ie = 1
757 itauq = itau
758 itaup = itauq + n
759 nwork = itaup + n
760*
761* Bidiagonalize R in WORK(IR)
762* CWorkspace: need N*N [U] + N*N [R] + 2*N [tauq, taup] + N [work]
763* CWorkspace: prefer N*N [U] + N*N [R] + 2*N [tauq, taup] + 2*N*NB [work]
764* RWorkspace: need N [e]
765*
766 CALL cgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),
767 $ work( itauq ), work( itaup ), work( nwork ),
768 $ lwork-nwork+1, ierr )
769*
770* Perform bidiagonal SVD, computing left singular vectors
771* of R in WORK(IRU) and computing right singular vectors
772* of R in WORK(IRVT)
773* CWorkspace: need 0
774* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC
775*
776 iru = ie + n
777 irvt = iru + n*n
778 nrwork = irvt + n*n
779 CALL sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
780 $ n, rwork( irvt ), n, dum, idum,
781 $ rwork( nrwork ), iwork, info )
782*
783* Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
784* Overwrite WORK(IU) by the left singular vectors of R
785* CWorkspace: need N*N [U] + N*N [R] + 2*N [tauq, taup] + N [work]
786* CWorkspace: prefer N*N [U] + N*N [R] + 2*N [tauq, taup] + N*NB [work]
787* RWorkspace: need 0
788*
789 CALL clacp2( 'F', n, n, rwork( iru ), n, work( iu ),
790 $ ldwrku )
791 CALL cunmbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,
792 $ work( itauq ), work( iu ), ldwrku,
793 $ work( nwork ), lwork-nwork+1, ierr )
794*
795* Copy real matrix RWORK(IRVT) to complex matrix VT
796* Overwrite VT by the right singular vectors of R
797* CWorkspace: need N*N [U] + N*N [R] + 2*N [tauq, taup] + N [work]
798* CWorkspace: prefer N*N [U] + N*N [R] + 2*N [tauq, taup] + N*NB [work]
799* RWorkspace: need 0
800*
801 CALL clacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt )
802 CALL cunmbr( 'P', 'R', 'C', n, n, n, work( ir ), ldwrkr,
803 $ work( itaup ), vt, ldvt, work( nwork ),
804 $ lwork-nwork+1, ierr )
805*
806* Multiply Q in A by left singular vectors of R in
807* WORK(IU), storing result in WORK(IR) and copying to A
808* CWorkspace: need N*N [U] + N*N [R]
809* CWorkspace: prefer N*N [U] + M*N [R]
810* RWorkspace: need 0
811*
812 DO 10 i = 1, m, ldwrkr
813 chunk = min( m-i+1, ldwrkr )
814 CALL cgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),
815 $ lda, work( iu ), ldwrku, czero,
816 $ work( ir ), ldwrkr )
817 CALL clacpy( 'F', chunk, n, work( ir ), ldwrkr,
818 $ a( i, 1 ), lda )
819 10 CONTINUE
820*
821 ELSE IF( wntqs ) THEN
822*
823* Path 3 (M >> N, JOBZ='S')
824* N left singular vectors to be computed in U and
825* N right singular vectors to be computed in VT
826*
827 ir = 1
828*
829* WORK(IR) is N by N
830*
831 ldwrkr = n
832 itau = ir + ldwrkr*n
833 nwork = itau + n
834*
835* Compute A=Q*R
836* CWorkspace: need N*N [R] + N [tau] + N [work]
837* CWorkspace: prefer N*N [R] + N [tau] + N*NB [work]
838* RWorkspace: need 0
839*
840 CALL cgeqrf( m, n, a, lda, work( itau ), work( nwork ),
841 $ lwork-nwork+1, ierr )
842*
843* Copy R to WORK(IR), zeroing out below it
844*
845 CALL clacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
846 CALL claset( 'L', n-1, n-1, czero, czero, work( ir+1 ),
847 $ ldwrkr )
848*
849* Generate Q in A
850* CWorkspace: need N*N [R] + N [tau] + N [work]
851* CWorkspace: prefer N*N [R] + N [tau] + N*NB [work]
852* RWorkspace: need 0
853*
854 CALL cungqr( m, n, n, a, lda, work( itau ),
855 $ work( nwork ), lwork-nwork+1, ierr )
856 ie = 1
857 itauq = itau
858 itaup = itauq + n
859 nwork = itaup + n
860*
861* Bidiagonalize R in WORK(IR)
862* CWorkspace: need N*N [R] + 2*N [tauq, taup] + N [work]
863* CWorkspace: prefer N*N [R] + 2*N [tauq, taup] + 2*N*NB [work]
864* RWorkspace: need N [e]
865*
866 CALL cgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),
867 $ work( itauq ), work( itaup ), work( nwork ),
868 $ lwork-nwork+1, ierr )
869*
870* Perform bidiagonal SVD, computing left singular vectors
871* of bidiagonal matrix in RWORK(IRU) and computing right
872* singular vectors of bidiagonal matrix in RWORK(IRVT)
873* CWorkspace: need 0
874* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC
875*
876 iru = ie + n
877 irvt = iru + n*n
878 nrwork = irvt + n*n
879 CALL sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
880 $ n, rwork( irvt ), n, dum, idum,
881 $ rwork( nrwork ), iwork, info )
882*
883* Copy real matrix RWORK(IRU) to complex matrix U
884* Overwrite U by left singular vectors of R
885* CWorkspace: need N*N [R] + 2*N [tauq, taup] + N [work]
886* CWorkspace: prefer N*N [R] + 2*N [tauq, taup] + N*NB [work]
887* RWorkspace: need 0
888*
889 CALL clacp2( 'F', n, n, rwork( iru ), n, u, ldu )
890 CALL cunmbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,
891 $ work( itauq ), u, ldu, work( nwork ),
892 $ lwork-nwork+1, ierr )
893*
894* Copy real matrix RWORK(IRVT) to complex matrix VT
895* Overwrite VT by right singular vectors of R
896* CWorkspace: need N*N [R] + 2*N [tauq, taup] + N [work]
897* CWorkspace: prefer N*N [R] + 2*N [tauq, taup] + N*NB [work]
898* RWorkspace: need 0
899*
900 CALL clacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt )
901 CALL cunmbr( 'P', 'R', 'C', n, n, n, work( ir ), ldwrkr,
902 $ work( itaup ), vt, ldvt, work( nwork ),
903 $ lwork-nwork+1, ierr )
904*
905* Multiply Q in A by left singular vectors of R in
906* WORK(IR), storing result in U
907* CWorkspace: need N*N [R]
908* RWorkspace: need 0
909*
910 CALL clacpy( 'F', n, n, u, ldu, work( ir ), ldwrkr )
911 CALL cgemm( 'N', 'N', m, n, n, cone, a, lda, work( ir ),
912 $ ldwrkr, czero, u, ldu )
913*
914 ELSE IF( wntqa ) THEN
915*
916* Path 4 (M >> N, JOBZ='A')
917* M left singular vectors to be computed in U and
918* N right singular vectors to be computed in VT
919*
920 iu = 1
921*
922* WORK(IU) is N by N
923*
924 ldwrku = n
925 itau = iu + ldwrku*n
926 nwork = itau + n
927*
928* Compute A=Q*R, copying result to U
929* CWorkspace: need N*N [U] + N [tau] + N [work]
930* CWorkspace: prefer N*N [U] + N [tau] + N*NB [work]
931* RWorkspace: need 0
932*
933 CALL cgeqrf( m, n, a, lda, work( itau ), work( nwork ),
934 $ lwork-nwork+1, ierr )
935 CALL clacpy( 'L', m, n, a, lda, u, ldu )
936*
937* Generate Q in U
938* CWorkspace: need N*N [U] + N [tau] + M [work]
939* CWorkspace: prefer N*N [U] + N [tau] + M*NB [work]
940* RWorkspace: need 0
941*
942 CALL cungqr( m, m, n, u, ldu, work( itau ),
943 $ work( nwork ), lwork-nwork+1, ierr )
944*
945* Produce R in A, zeroing out below it
946*
947 CALL claset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),
948 $ lda )
949 ie = 1
950 itauq = itau
951 itaup = itauq + n
952 nwork = itaup + n
953*
954* Bidiagonalize R in A
955* CWorkspace: need N*N [U] + 2*N [tauq, taup] + N [work]
956* CWorkspace: prefer N*N [U] + 2*N [tauq, taup] + 2*N*NB [work]
957* RWorkspace: need N [e]
958*
959 CALL cgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),
960 $ work( itaup ), work( nwork ), lwork-nwork+1,
961 $ ierr )
962 iru = ie + n
963 irvt = iru + n*n
964 nrwork = irvt + n*n
965*
966* Perform bidiagonal SVD, computing left singular vectors
967* of bidiagonal matrix in RWORK(IRU) and computing right
968* singular vectors of bidiagonal matrix in RWORK(IRVT)
969* CWorkspace: need 0
970* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC
971*
972 CALL sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
973 $ n, rwork( irvt ), n, dum, idum,
974 $ rwork( nrwork ), iwork, info )
975*
976* Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
977* Overwrite WORK(IU) by left singular vectors of R
978* CWorkspace: need N*N [U] + 2*N [tauq, taup] + N [work]
979* CWorkspace: prefer N*N [U] + 2*N [tauq, taup] + N*NB [work]
980* RWorkspace: need 0
981*
982 CALL clacp2( 'F', n, n, rwork( iru ), n, work( iu ),
983 $ ldwrku )
984 CALL cunmbr( 'Q', 'L', 'N', n, n, n, a, lda,
985 $ work( itauq ), work( iu ), ldwrku,
986 $ work( nwork ), lwork-nwork+1, ierr )
987*
988* Copy real matrix RWORK(IRVT) to complex matrix VT
989* Overwrite VT by right singular vectors of R
990* CWorkspace: need N*N [U] + 2*N [tauq, taup] + N [work]
991* CWorkspace: prefer N*N [U] + 2*N [tauq, taup] + N*NB [work]
992* RWorkspace: need 0
993*
994 CALL clacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt )
995 CALL cunmbr( 'P', 'R', 'C', n, n, n, a, lda,
996 $ work( itaup ), vt, ldvt, work( nwork ),
997 $ lwork-nwork+1, ierr )
998*
999* Multiply Q in U by left singular vectors of R in
1000* WORK(IU), storing result in A
1001* CWorkspace: need N*N [U]
1002* RWorkspace: need 0
1003*
1004 CALL cgemm( 'N', 'N', m, n, n, cone, u, ldu, work( iu ),
1005 $ ldwrku, czero, a, lda )
1006*
1007* Copy left singular vectors of A from A to U
1008*
1009 CALL clacpy( 'F', m, n, a, lda, u, ldu )
1010*
1011 END IF
1012*
1013 ELSE IF( m.GE.mnthr2 ) THEN
1014*
1015* MNTHR2 <= M < MNTHR1
1016*
1017* Path 5 (M >> N, but not as much as MNTHR1)
1018* Reduce to bidiagonal form without QR decomposition, use
1019* CUNGBR and matrix multiplication to compute singular vectors
1020*
1021 ie = 1
1022 nrwork = ie + n
1023 itauq = 1
1024 itaup = itauq + n
1025 nwork = itaup + n
1026*
1027* Bidiagonalize A
1028* CWorkspace: need 2*N [tauq, taup] + M [work]
1029* CWorkspace: prefer 2*N [tauq, taup] + (M+N)*NB [work]
1030* RWorkspace: need N [e]
1031*
1032 CALL cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
1033 $ work( itaup ), work( nwork ), lwork-nwork+1,
1034 $ ierr )
1035 IF( wntqn ) THEN
1036*
1037* Path 5n (M >> N, JOBZ='N')
1038* Compute singular values only
1039* CWorkspace: need 0
1040* RWorkspace: need N [e] + BDSPAC
1041*
1042 CALL sbdsdc( 'U', 'N', n, s, rwork( ie ), dum, 1,dum,1,
1043 $ dum, idum, rwork( nrwork ), iwork, info )
1044 ELSE IF( wntqo ) THEN
1045 iu = nwork
1046 iru = nrwork
1047 irvt = iru + n*n
1048 nrwork = irvt + n*n
1049*
1050* Path 5o (M >> N, JOBZ='O')
1051* Copy A to VT, generate P**H
1052* CWorkspace: need 2*N [tauq, taup] + N [work]
1053* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work]
1054* RWorkspace: need 0
1055*
1056 CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
1057 CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1058 $ work( nwork ), lwork-nwork+1, ierr )
1059*
1060* Generate Q in A
1061* CWorkspace: need 2*N [tauq, taup] + N [work]
1062* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work]
1063* RWorkspace: need 0
1064*
1065 CALL cungbr( 'Q', m, n, n, a, lda, work( itauq ),
1066 $ work( nwork ), lwork-nwork+1, ierr )
1067*
1068 IF( lwork .GE. m*n + 3*n ) THEN
1069*
1070* WORK( IU ) is M by N
1071*
1072 ldwrku = m
1073 ELSE
1074*
1075* WORK(IU) is LDWRKU by N
1076*
1077 ldwrku = ( lwork - 3*n ) / n
1078 END IF
1079 nwork = iu + ldwrku*n
1080*
1081* Perform bidiagonal SVD, computing left singular vectors
1082* of bidiagonal matrix in RWORK(IRU) and computing right
1083* singular vectors of bidiagonal matrix in RWORK(IRVT)
1084* CWorkspace: need 0
1085* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC
1086*
1087 CALL sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
1088 $ n, rwork( irvt ), n, dum, idum,
1089 $ rwork( nrwork ), iwork, info )
1090*
1091* Multiply real matrix RWORK(IRVT) by P**H in VT,
1092* storing the result in WORK(IU), copying to VT
1093* CWorkspace: need 2*N [tauq, taup] + N*N [U]
1094* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + 2*N*N [rwork]
1095*
1096 CALL clarcm( n, n, rwork( irvt ), n, vt, ldvt,
1097 $ work( iu ), ldwrku, rwork( nrwork ) )
1098 CALL clacpy( 'F', n, n, work( iu ), ldwrku, vt, ldvt )
1099*
1100* Multiply Q in A by real matrix RWORK(IRU), storing the
1101* result in WORK(IU), copying to A
1102* CWorkspace: need 2*N [tauq, taup] + N*N [U]
1103* CWorkspace: prefer 2*N [tauq, taup] + M*N [U]
1104* RWorkspace: need N [e] + N*N [RU] + 2*N*N [rwork]
1105* RWorkspace: prefer N [e] + N*N [RU] + 2*M*N [rwork] < N + 5*N*N since M < 2*N here
1106*
1107 nrwork = irvt
1108 DO 20 i = 1, m, ldwrku
1109 chunk = min( m-i+1, ldwrku )
1110 CALL clacrm( chunk, n, a( i, 1 ), lda, rwork( iru ),
1111 $ n, work( iu ), ldwrku, rwork( nrwork ) )
1112 CALL clacpy( 'F', chunk, n, work( iu ), ldwrku,
1113 $ a( i, 1 ), lda )
1114 20 CONTINUE
1115*
1116 ELSE IF( wntqs ) THEN
1117*
1118* Path 5s (M >> N, JOBZ='S')
1119* Copy A to VT, generate P**H
1120* CWorkspace: need 2*N [tauq, taup] + N [work]
1121* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work]
1122* RWorkspace: need 0
1123*
1124 CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
1125 CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1126 $ work( nwork ), lwork-nwork+1, ierr )
1127*
1128* Copy A to U, generate Q
1129* CWorkspace: need 2*N [tauq, taup] + N [work]
1130* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work]
1131* RWorkspace: need 0
1132*
1133 CALL clacpy( 'L', m, n, a, lda, u, ldu )
1134 CALL cungbr( 'Q', m, n, n, u, ldu, work( itauq ),
1135 $ work( nwork ), lwork-nwork+1, ierr )
1136*
1137* Perform bidiagonal SVD, computing left singular vectors
1138* of bidiagonal matrix in RWORK(IRU) and computing right
1139* singular vectors of bidiagonal matrix in RWORK(IRVT)
1140* CWorkspace: need 0
1141* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC
1142*
1143 iru = nrwork
1144 irvt = iru + n*n
1145 nrwork = irvt + n*n
1146 CALL sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
1147 $ n, rwork( irvt ), n, dum, idum,
1148 $ rwork( nrwork ), iwork, info )
1149*
1150* Multiply real matrix RWORK(IRVT) by P**H in VT,
1151* storing the result in A, copying to VT
1152* CWorkspace: need 0
1153* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + 2*N*N [rwork]
1154*
1155 CALL clarcm( n, n, rwork( irvt ), n, vt, ldvt, a, lda,
1156 $ rwork( nrwork ) )
1157 CALL clacpy( 'F', n, n, a, lda, vt, ldvt )
1158*
1159* Multiply Q in U by real matrix RWORK(IRU), storing the
1160* result in A, copying to U
1161* CWorkspace: need 0
1162* RWorkspace: need N [e] + N*N [RU] + 2*M*N [rwork] < N + 5*N*N since M < 2*N here
1163*
1164 nrwork = irvt
1165 CALL clacrm( m, n, u, ldu, rwork( iru ), n, a, lda,
1166 $ rwork( nrwork ) )
1167 CALL clacpy( 'F', m, n, a, lda, u, ldu )
1168 ELSE
1169*
1170* Path 5a (M >> N, JOBZ='A')
1171* Copy A to VT, generate P**H
1172* CWorkspace: need 2*N [tauq, taup] + N [work]
1173* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work]
1174* RWorkspace: need 0
1175*
1176 CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
1177 CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1178 $ work( nwork ), lwork-nwork+1, ierr )
1179*
1180* Copy A to U, generate Q
1181* CWorkspace: need 2*N [tauq, taup] + M [work]
1182* CWorkspace: prefer 2*N [tauq, taup] + M*NB [work]
1183* RWorkspace: need 0
1184*
1185 CALL clacpy( 'L', m, n, a, lda, u, ldu )
1186 CALL cungbr( 'Q', m, m, n, u, ldu, work( itauq ),
1187 $ work( nwork ), lwork-nwork+1, ierr )
1188*
1189* Perform bidiagonal SVD, computing left singular vectors
1190* of bidiagonal matrix in RWORK(IRU) and computing right
1191* singular vectors of bidiagonal matrix in RWORK(IRVT)
1192* CWorkspace: need 0
1193* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC
1194*
1195 iru = nrwork
1196 irvt = iru + n*n
1197 nrwork = irvt + n*n
1198 CALL sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
1199 $ n, rwork( irvt ), n, dum, idum,
1200 $ rwork( nrwork ), iwork, info )
1201*
1202* Multiply real matrix RWORK(IRVT) by P**H in VT,
1203* storing the result in A, copying to VT
1204* CWorkspace: need 0
1205* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + 2*N*N [rwork]
1206*
1207 CALL clarcm( n, n, rwork( irvt ), n, vt, ldvt, a, lda,
1208 $ rwork( nrwork ) )
1209 CALL clacpy( 'F', n, n, a, lda, vt, ldvt )
1210*
1211* Multiply Q in U by real matrix RWORK(IRU), storing the
1212* result in A, copying to U
1213* CWorkspace: need 0
1214* RWorkspace: need N [e] + N*N [RU] + 2*M*N [rwork] < N + 5*N*N since M < 2*N here
1215*
1216 nrwork = irvt
1217 CALL clacrm( m, n, u, ldu, rwork( iru ), n, a, lda,
1218 $ rwork( nrwork ) )
1219 CALL clacpy( 'F', m, n, a, lda, u, ldu )
1220 END IF
1221*
1222 ELSE
1223*
1224* M .LT. MNTHR2
1225*
1226* Path 6 (M >= N, but not much larger)
1227* Reduce to bidiagonal form without QR decomposition
1228* Use CUNMBR to compute singular vectors
1229*
1230 ie = 1
1231 nrwork = ie + n
1232 itauq = 1
1233 itaup = itauq + n
1234 nwork = itaup + n
1235*
1236* Bidiagonalize A
1237* CWorkspace: need 2*N [tauq, taup] + M [work]
1238* CWorkspace: prefer 2*N [tauq, taup] + (M+N)*NB [work]
1239* RWorkspace: need N [e]
1240*
1241 CALL cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
1242 $ work( itaup ), work( nwork ), lwork-nwork+1,
1243 $ ierr )
1244 IF( wntqn ) THEN
1245*
1246* Path 6n (M >= N, JOBZ='N')
1247* Compute singular values only
1248* CWorkspace: need 0
1249* RWorkspace: need N [e] + BDSPAC
1250*
1251 CALL sbdsdc( 'U', 'N', n, s, rwork( ie ), dum,1,dum,1,
1252 $ dum, idum, rwork( nrwork ), iwork, info )
1253 ELSE IF( wntqo ) THEN
1254 iu = nwork
1255 iru = nrwork
1256 irvt = iru + n*n
1257 nrwork = irvt + n*n
1258 IF( lwork .GE. m*n + 3*n ) THEN
1259*
1260* WORK( IU ) is M by N
1261*
1262 ldwrku = m
1263 ELSE
1264*
1265* WORK( IU ) is LDWRKU by N
1266*
1267 ldwrku = ( lwork - 3*n ) / n
1268 END IF
1269 nwork = iu + ldwrku*n
1270*
1271* Path 6o (M >= N, JOBZ='O')
1272* Perform bidiagonal SVD, computing left singular vectors
1273* of bidiagonal matrix in RWORK(IRU) and computing right
1274* singular vectors of bidiagonal matrix in RWORK(IRVT)
1275* CWorkspace: need 0
1276* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC
1277*
1278 CALL sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
1279 $ n, rwork( irvt ), n, dum, idum,
1280 $ rwork( nrwork ), iwork, info )
1281*
1282* Copy real matrix RWORK(IRVT) to complex matrix VT
1283* Overwrite VT by right singular vectors of A
1284* CWorkspace: need 2*N [tauq, taup] + N*N [U] + N [work]
1285* CWorkspace: prefer 2*N [tauq, taup] + N*N [U] + N*NB [work]
1286* RWorkspace: need N [e] + N*N [RU] + N*N [RVT]
1287*
1288 CALL clacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt )
1289 CALL cunmbr( 'P', 'R', 'C', n, n, n, a, lda,
1290 $ work( itaup ), vt, ldvt, work( nwork ),
1291 $ lwork-nwork+1, ierr )
1292*
1293 IF( lwork .GE. m*n + 3*n ) THEN
1294*
1295* Path 6o-fast
1296* Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
1297* Overwrite WORK(IU) by left singular vectors of A, copying
1298* to A
1299* CWorkspace: need 2*N [tauq, taup] + M*N [U] + N [work]
1300* CWorkspace: prefer 2*N [tauq, taup] + M*N [U] + N*NB [work]
1301* RWorkspace: need N [e] + N*N [RU]
1302*
1303 CALL claset( 'F', m, n, czero, czero, work( iu ),
1304 $ ldwrku )
1305 CALL clacp2( 'F', n, n, rwork( iru ), n, work( iu ),
1306 $ ldwrku )
1307 CALL cunmbr( 'Q', 'L', 'N', m, n, n, a, lda,
1308 $ work( itauq ), work( iu ), ldwrku,
1309 $ work( nwork ), lwork-nwork+1, ierr )
1310 CALL clacpy( 'F', m, n, work( iu ), ldwrku, a, lda )
1311 ELSE
1312*
1313* Path 6o-slow
1314* Generate Q in A
1315* CWorkspace: need 2*N [tauq, taup] + N*N [U] + N [work]
1316* CWorkspace: prefer 2*N [tauq, taup] + N*N [U] + N*NB [work]
1317* RWorkspace: need 0
1318*
1319 CALL cungbr( 'Q', m, n, n, a, lda, work( itauq ),
1320 $ work( nwork ), lwork-nwork+1, ierr )
1321*
1322* Multiply Q in A by real matrix RWORK(IRU), storing the
1323* result in WORK(IU), copying to A
1324* CWorkspace: need 2*N [tauq, taup] + N*N [U]
1325* CWorkspace: prefer 2*N [tauq, taup] + M*N [U]
1326* RWorkspace: need N [e] + N*N [RU] + 2*N*N [rwork]
1327* RWorkspace: prefer N [e] + N*N [RU] + 2*M*N [rwork] < N + 5*N*N since M < 2*N here
1328*
1329 nrwork = irvt
1330 DO 30 i = 1, m, ldwrku
1331 chunk = min( m-i+1, ldwrku )
1332 CALL clacrm( chunk, n, a( i, 1 ), lda,
1333 $ rwork( iru ), n, work( iu ), ldwrku,
1334 $ rwork( nrwork ) )
1335 CALL clacpy( 'F', chunk, n, work( iu ), ldwrku,
1336 $ a( i, 1 ), lda )
1337 30 CONTINUE
1338 END IF
1339*
1340 ELSE IF( wntqs ) THEN
1341*
1342* Path 6s (M >= N, JOBZ='S')
1343* Perform bidiagonal SVD, computing left singular vectors
1344* of bidiagonal matrix in RWORK(IRU) and computing right
1345* singular vectors of bidiagonal matrix in RWORK(IRVT)
1346* CWorkspace: need 0
1347* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC
1348*
1349 iru = nrwork
1350 irvt = iru + n*n
1351 nrwork = irvt + n*n
1352 CALL sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
1353 $ n, rwork( irvt ), n, dum, idum,
1354 $ rwork( nrwork ), iwork, info )
1355*
1356* Copy real matrix RWORK(IRU) to complex matrix U
1357* Overwrite U by left singular vectors of A
1358* CWorkspace: need 2*N [tauq, taup] + N [work]
1359* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work]
1360* RWorkspace: need N [e] + N*N [RU] + N*N [RVT]
1361*
1362 CALL claset( 'F', m, n, czero, czero, u, ldu )
1363 CALL clacp2( 'F', n, n, rwork( iru ), n, u, ldu )
1364 CALL cunmbr( 'Q', 'L', 'N', m, n, n, a, lda,
1365 $ work( itauq ), u, ldu, work( nwork ),
1366 $ lwork-nwork+1, ierr )
1367*
1368* Copy real matrix RWORK(IRVT) to complex matrix VT
1369* Overwrite VT by right singular vectors of A
1370* CWorkspace: need 2*N [tauq, taup] + N [work]
1371* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work]
1372* RWorkspace: need N [e] + N*N [RU] + N*N [RVT]
1373*
1374 CALL clacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt )
1375 CALL cunmbr( 'P', 'R', 'C', n, n, n, a, lda,
1376 $ work( itaup ), vt, ldvt, work( nwork ),
1377 $ lwork-nwork+1, ierr )
1378 ELSE
1379*
1380* Path 6a (M >= N, JOBZ='A')
1381* Perform bidiagonal SVD, computing left singular vectors
1382* of bidiagonal matrix in RWORK(IRU) and computing right
1383* singular vectors of bidiagonal matrix in RWORK(IRVT)
1384* CWorkspace: need 0
1385* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC
1386*
1387 iru = nrwork
1388 irvt = iru + n*n
1389 nrwork = irvt + n*n
1390 CALL sbdsdc( 'U', 'I', n, s, rwork( ie ), rwork( iru ),
1391 $ n, rwork( irvt ), n, dum, idum,
1392 $ rwork( nrwork ), iwork, info )
1393*
1394* Set the right corner of U to identity matrix
1395*
1396 CALL claset( 'F', m, m, czero, czero, u, ldu )
1397 IF( m.GT.n ) THEN
1398 CALL claset( 'F', m-n, m-n, czero, cone,
1399 $ u( n+1, n+1 ), ldu )
1400 END IF
1401*
1402* Copy real matrix RWORK(IRU) to complex matrix U
1403* Overwrite U by left singular vectors of A
1404* CWorkspace: need 2*N [tauq, taup] + M [work]
1405* CWorkspace: prefer 2*N [tauq, taup] + M*NB [work]
1406* RWorkspace: need N [e] + N*N [RU] + N*N [RVT]
1407*
1408 CALL clacp2( 'F', n, n, rwork( iru ), n, u, ldu )
1409 CALL cunmbr( 'Q', 'L', 'N', m, m, n, a, lda,
1410 $ work( itauq ), u, ldu, work( nwork ),
1411 $ lwork-nwork+1, ierr )
1412*
1413* Copy real matrix RWORK(IRVT) to complex matrix VT
1414* Overwrite VT by right singular vectors of A
1415* CWorkspace: need 2*N [tauq, taup] + N [work]
1416* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work]
1417* RWorkspace: need N [e] + N*N [RU] + N*N [RVT]
1418*
1419 CALL clacp2( 'F', n, n, rwork( irvt ), n, vt, ldvt )
1420 CALL cunmbr( 'P', 'R', 'C', n, n, n, a, lda,
1421 $ work( itaup ), vt, ldvt, work( nwork ),
1422 $ lwork-nwork+1, ierr )
1423 END IF
1424*
1425 END IF
1426*
1427 ELSE
1428*
1429* A has more columns than rows. If A has sufficiently more
1430* columns than rows, first reduce using the LQ decomposition (if
1431* sufficient workspace available)
1432*
1433 IF( n.GE.mnthr1 ) THEN
1434*
1435 IF( wntqn ) THEN
1436*
1437* Path 1t (N >> M, JOBZ='N')
1438* No singular vectors to be computed
1439*
1440 itau = 1
1441 nwork = itau + m
1442*
1443* Compute A=L*Q
1444* CWorkspace: need M [tau] + M [work]
1445* CWorkspace: prefer M [tau] + M*NB [work]
1446* RWorkspace: need 0
1447*
1448 CALL cgelqf( m, n, a, lda, work( itau ), work( nwork ),
1449 $ lwork-nwork+1, ierr )
1450*
1451* Zero out above L
1452*
1453 CALL claset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),
1454 $ lda )
1455 ie = 1
1456 itauq = 1
1457 itaup = itauq + m
1458 nwork = itaup + m
1459*
1460* Bidiagonalize L in A
1461* CWorkspace: need 2*M [tauq, taup] + M [work]
1462* CWorkspace: prefer 2*M [tauq, taup] + 2*M*NB [work]
1463* RWorkspace: need M [e]
1464*
1465 CALL cgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),
1466 $ work( itaup ), work( nwork ), lwork-nwork+1,
1467 $ ierr )
1468 nrwork = ie + m
1469*
1470* Perform bidiagonal SVD, compute singular values only
1471* CWorkspace: need 0
1472* RWorkspace: need M [e] + BDSPAC
1473*
1474 CALL sbdsdc( 'U', 'N', m, s, rwork( ie ), dum,1,dum,1,
1475 $ dum, idum, rwork( nrwork ), iwork, info )
1476*
1477 ELSE IF( wntqo ) THEN
1478*
1479* Path 2t (N >> M, JOBZ='O')
1480* M right singular vectors to be overwritten on A and
1481* M left singular vectors to be computed in U
1482*
1483 ivt = 1
1484 ldwkvt = m
1485*
1486* WORK(IVT) is M by M
1487*
1488 il = ivt + ldwkvt*m
1489 IF( lwork .GE. m*n + m*m + 3*m ) THEN
1490*
1491* WORK(IL) M by N
1492*
1493 ldwrkl = m
1494 chunk = n
1495 ELSE
1496*
1497* WORK(IL) is M by CHUNK
1498*
1499 ldwrkl = m
1500 chunk = ( lwork - m*m - 3*m ) / m
1501 END IF
1502 itau = il + ldwrkl*chunk
1503 nwork = itau + m
1504*
1505* Compute A=L*Q
1506* CWorkspace: need M*M [VT] + M*M [L] + M [tau] + M [work]
1507* CWorkspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
1508* RWorkspace: need 0
1509*
1510 CALL cgelqf( m, n, a, lda, work( itau ), work( nwork ),
1511 $ lwork-nwork+1, ierr )
1512*
1513* Copy L to WORK(IL), zeroing about above it
1514*
1515 CALL clacpy( 'L', m, m, a, lda, work( il ), ldwrkl )
1516 CALL claset( 'U', m-1, m-1, czero, czero,
1517 $ work( il+ldwrkl ), ldwrkl )
1518*
1519* Generate Q in A
1520* CWorkspace: need M*M [VT] + M*M [L] + M [tau] + M [work]
1521* CWorkspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
1522* RWorkspace: need 0
1523*
1524 CALL cunglq( m, n, m, a, lda, work( itau ),
1525 $ work( nwork ), lwork-nwork+1, ierr )
1526 ie = 1
1527 itauq = itau
1528 itaup = itauq + m
1529 nwork = itaup + m
1530*
1531* Bidiagonalize L in WORK(IL)
1532* CWorkspace: need M*M [VT] + M*M [L] + 2*M [tauq, taup] + M [work]
1533* CWorkspace: prefer M*M [VT] + M*M [L] + 2*M [tauq, taup] + 2*M*NB [work]
1534* RWorkspace: need M [e]
1535*
1536 CALL cgebrd( m, m, work( il ), ldwrkl, s, rwork( ie ),
1537 $ work( itauq ), work( itaup ), work( nwork ),
1538 $ lwork-nwork+1, ierr )
1539*
1540* Perform bidiagonal SVD, computing left singular vectors
1541* of bidiagonal matrix in RWORK(IRU) and computing right
1542* singular vectors of bidiagonal matrix in RWORK(IRVT)
1543* CWorkspace: need 0
1544* RWorkspace: need M [e] + M*M [RU] + M*M [RVT] + BDSPAC
1545*
1546 iru = ie + m
1547 irvt = iru + m*m
1548 nrwork = irvt + m*m
1549 CALL sbdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),
1550 $ m, rwork( irvt ), m, dum, idum,
1551 $ rwork( nrwork ), iwork, info )
1552*
1553* Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
1554* Overwrite WORK(IU) by the left singular vectors of L
1555* CWorkspace: need M*M [VT] + M*M [L] + 2*M [tauq, taup] + M [work]
1556* CWorkspace: prefer M*M [VT] + M*M [L] + 2*M [tauq, taup] + M*NB [work]
1557* RWorkspace: need 0
1558*
1559 CALL clacp2( 'F', m, m, rwork( iru ), m, u, ldu )
1560 CALL cunmbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,
1561 $ work( itauq ), u, ldu, work( nwork ),
1562 $ lwork-nwork+1, ierr )
1563*
1564* Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
1565* Overwrite WORK(IVT) by the right singular vectors of L
1566* CWorkspace: need M*M [VT] + M*M [L] + 2*M [tauq, taup] + M [work]
1567* CWorkspace: prefer M*M [VT] + M*M [L] + 2*M [tauq, taup] + M*NB [work]
1568* RWorkspace: need 0
1569*
1570 CALL clacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),
1571 $ ldwkvt )
1572 CALL cunmbr( 'P', 'R', 'C', m, m, m, work( il ), ldwrkl,
1573 $ work( itaup ), work( ivt ), ldwkvt,
1574 $ work( nwork ), lwork-nwork+1, ierr )
1575*
1576* Multiply right singular vectors of L in WORK(IL) by Q
1577* in A, storing result in WORK(IL) and copying to A
1578* CWorkspace: need M*M [VT] + M*M [L]
1579* CWorkspace: prefer M*M [VT] + M*N [L]
1580* RWorkspace: need 0
1581*
1582 DO 40 i = 1, n, chunk
1583 blk = min( n-i+1, chunk )
1584 CALL cgemm( 'N', 'N', m, blk, m, cone, work( ivt ), m,
1585 $ a( 1, i ), lda, czero, work( il ),
1586 $ ldwrkl )
1587 CALL clacpy( 'F', m, blk, work( il ), ldwrkl,
1588 $ a( 1, i ), lda )
1589 40 CONTINUE
1590*
1591 ELSE IF( wntqs ) THEN
1592*
1593* Path 3t (N >> M, JOBZ='S')
1594* M right singular vectors to be computed in VT and
1595* M left singular vectors to be computed in U
1596*
1597 il = 1
1598*
1599* WORK(IL) is M by M
1600*
1601 ldwrkl = m
1602 itau = il + ldwrkl*m
1603 nwork = itau + m
1604*
1605* Compute A=L*Q
1606* CWorkspace: need M*M [L] + M [tau] + M [work]
1607* CWorkspace: prefer M*M [L] + M [tau] + M*NB [work]
1608* RWorkspace: need 0
1609*
1610 CALL cgelqf( m, n, a, lda, work( itau ), work( nwork ),
1611 $ lwork-nwork+1, ierr )
1612*
1613* Copy L to WORK(IL), zeroing out above it
1614*
1615 CALL clacpy( 'L', m, m, a, lda, work( il ), ldwrkl )
1616 CALL claset( 'U', m-1, m-1, czero, czero,
1617 $ work( il+ldwrkl ), ldwrkl )
1618*
1619* Generate Q in A
1620* CWorkspace: need M*M [L] + M [tau] + M [work]
1621* CWorkspace: prefer M*M [L] + M [tau] + M*NB [work]
1622* RWorkspace: need 0
1623*
1624 CALL cunglq( m, n, m, a, lda, work( itau ),
1625 $ work( nwork ), lwork-nwork+1, ierr )
1626 ie = 1
1627 itauq = itau
1628 itaup = itauq + m
1629 nwork = itaup + m
1630*
1631* Bidiagonalize L in WORK(IL)
1632* CWorkspace: need M*M [L] + 2*M [tauq, taup] + M [work]
1633* CWorkspace: prefer M*M [L] + 2*M [tauq, taup] + 2*M*NB [work]
1634* RWorkspace: need M [e]
1635*
1636 CALL cgebrd( m, m, work( il ), ldwrkl, s, rwork( ie ),
1637 $ work( itauq ), work( itaup ), work( nwork ),
1638 $ lwork-nwork+1, ierr )
1639*
1640* Perform bidiagonal SVD, computing left singular vectors
1641* of bidiagonal matrix in RWORK(IRU) and computing right
1642* singular vectors of bidiagonal matrix in RWORK(IRVT)
1643* CWorkspace: need 0
1644* RWorkspace: need M [e] + M*M [RU] + M*M [RVT] + BDSPAC
1645*
1646 iru = ie + m
1647 irvt = iru + m*m
1648 nrwork = irvt + m*m
1649 CALL sbdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),
1650 $ m, rwork( irvt ), m, dum, idum,
1651 $ rwork( nrwork ), iwork, info )
1652*
1653* Copy real matrix RWORK(IRU) to complex matrix U
1654* Overwrite U by left singular vectors of L
1655* CWorkspace: need M*M [L] + 2*M [tauq, taup] + M [work]
1656* CWorkspace: prefer M*M [L] + 2*M [tauq, taup] + M*NB [work]
1657* RWorkspace: need 0
1658*
1659 CALL clacp2( 'F', m, m, rwork( iru ), m, u, ldu )
1660 CALL cunmbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,
1661 $ work( itauq ), u, ldu, work( nwork ),
1662 $ lwork-nwork+1, ierr )
1663*
1664* Copy real matrix RWORK(IRVT) to complex matrix VT
1665* Overwrite VT by left singular vectors of L
1666* CWorkspace: need M*M [L] + 2*M [tauq, taup] + M [work]
1667* CWorkspace: prefer M*M [L] + 2*M [tauq, taup] + M*NB [work]
1668* RWorkspace: need 0
1669*
1670 CALL clacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt )
1671 CALL cunmbr( 'P', 'R', 'C', m, m, m, work( il ), ldwrkl,
1672 $ work( itaup ), vt, ldvt, work( nwork ),
1673 $ lwork-nwork+1, ierr )
1674*
1675* Copy VT to WORK(IL), multiply right singular vectors of L
1676* in WORK(IL) by Q in A, storing result in VT
1677* CWorkspace: need M*M [L]
1678* RWorkspace: need 0
1679*
1680 CALL clacpy( 'F', m, m, vt, ldvt, work( il ), ldwrkl )
1681 CALL cgemm( 'N', 'N', m, n, m, cone, work( il ), ldwrkl,
1682 $ a, lda, czero, vt, ldvt )
1683*
1684 ELSE IF( wntqa ) THEN
1685*
1686* Path 4t (N >> M, JOBZ='A')
1687* N right singular vectors to be computed in VT and
1688* M left singular vectors to be computed in U
1689*
1690 ivt = 1
1691*
1692* WORK(IVT) is M by M
1693*
1694 ldwkvt = m
1695 itau = ivt + ldwkvt*m
1696 nwork = itau + m
1697*
1698* Compute A=L*Q, copying result to VT
1699* CWorkspace: need M*M [VT] + M [tau] + M [work]
1700* CWorkspace: prefer M*M [VT] + M [tau] + M*NB [work]
1701* RWorkspace: need 0
1702*
1703 CALL cgelqf( m, n, a, lda, work( itau ), work( nwork ),
1704 $ lwork-nwork+1, ierr )
1705 CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
1706*
1707* Generate Q in VT
1708* CWorkspace: need M*M [VT] + M [tau] + N [work]
1709* CWorkspace: prefer M*M [VT] + M [tau] + N*NB [work]
1710* RWorkspace: need 0
1711*
1712 CALL cunglq( n, n, m, vt, ldvt, work( itau ),
1713 $ work( nwork ), lwork-nwork+1, ierr )
1714*
1715* Produce L in A, zeroing out above it
1716*
1717 CALL claset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),
1718 $ lda )
1719 ie = 1
1720 itauq = itau
1721 itaup = itauq + m
1722 nwork = itaup + m
1723*
1724* Bidiagonalize L in A
1725* CWorkspace: need M*M [VT] + 2*M [tauq, taup] + M [work]
1726* CWorkspace: prefer M*M [VT] + 2*M [tauq, taup] + 2*M*NB [work]
1727* RWorkspace: need M [e]
1728*
1729 CALL cgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),
1730 $ work( itaup ), work( nwork ), lwork-nwork+1,
1731 $ ierr )
1732*
1733* Perform bidiagonal SVD, computing left singular vectors
1734* of bidiagonal matrix in RWORK(IRU) and computing right
1735* singular vectors of bidiagonal matrix in RWORK(IRVT)
1736* CWorkspace: need 0
1737* RWorkspace: need M [e] + M*M [RU] + M*M [RVT] + BDSPAC
1738*
1739 iru = ie + m
1740 irvt = iru + m*m
1741 nrwork = irvt + m*m
1742 CALL sbdsdc( 'U', 'I', m, s, rwork( ie ), rwork( iru ),
1743 $ m, rwork( irvt ), m, dum, idum,
1744 $ rwork( nrwork ), iwork, info )
1745*
1746* Copy real matrix RWORK(IRU) to complex matrix U
1747* Overwrite U by left singular vectors of L
1748* CWorkspace: need M*M [VT] + 2*M [tauq, taup] + M [work]
1749* CWorkspace: prefer M*M [VT] + 2*M [tauq, taup] + M*NB [work]
1750* RWorkspace: need 0
1751*
1752 CALL clacp2( 'F', m, m, rwork( iru ), m, u, ldu )
1753 CALL cunmbr( 'Q', 'L', 'N', m, m, m, a, lda,
1754 $ work( itauq ), u, ldu, work( nwork ),
1755 $ lwork-nwork+1, ierr )
1756*
1757* Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
1758* Overwrite WORK(IVT) by right singular vectors of L
1759* CWorkspace: need M*M [VT] + 2*M [tauq, taup] + M [work]
1760* CWorkspace: prefer M*M [VT] + 2*M [tauq, taup] + M*NB [work]
1761* RWorkspace: need 0
1762*
1763 CALL clacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),
1764 $ ldwkvt )
1765 CALL cunmbr( 'P', 'R', 'C', m, m, m, a, lda,
1766 $ work( itaup ), work( ivt ), ldwkvt,
1767 $ work( nwork ), lwork-nwork+1, ierr )
1768*
1769* Multiply right singular vectors of L in WORK(IVT) by
1770* Q in VT, storing result in A
1771* CWorkspace: need M*M [VT]
1772* RWorkspace: need 0
1773*
1774 CALL cgemm( 'N', 'N', m, n, m, cone, work( ivt ), ldwkvt,
1775 $ vt, ldvt, czero, a, lda )
1776*
1777* Copy right singular vectors of A from A to VT
1778*
1779 CALL clacpy( 'F', m, n, a, lda, vt, ldvt )
1780*
1781 END IF
1782*
1783 ELSE IF( n.GE.mnthr2 ) THEN
1784*
1785* MNTHR2 <= N < MNTHR1
1786*
1787* Path 5t (N >> M, but not as much as MNTHR1)
1788* Reduce to bidiagonal form without QR decomposition, use
1789* CUNGBR and matrix multiplication to compute singular vectors
1790*
1791 ie = 1
1792 nrwork = ie + m
1793 itauq = 1
1794 itaup = itauq + m
1795 nwork = itaup + m
1796*
1797* Bidiagonalize A
1798* CWorkspace: need 2*M [tauq, taup] + N [work]
1799* CWorkspace: prefer 2*M [tauq, taup] + (M+N)*NB [work]
1800* RWorkspace: need M [e]
1801*
1802 CALL cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
1803 $ work( itaup ), work( nwork ), lwork-nwork+1,
1804 $ ierr )
1805*
1806 IF( wntqn ) THEN
1807*
1808* Path 5tn (N >> M, JOBZ='N')
1809* Compute singular values only
1810* CWorkspace: need 0
1811* RWorkspace: need M [e] + BDSPAC
1812*
1813 CALL sbdsdc( 'L', 'N', m, s, rwork( ie ), dum,1,dum,1,
1814 $ dum, idum, rwork( nrwork ), iwork, info )
1815 ELSE IF( wntqo ) THEN
1816 irvt = nrwork
1817 iru = irvt + m*m
1818 nrwork = iru + m*m
1819 ivt = nwork
1820*
1821* Path 5to (N >> M, JOBZ='O')
1822* Copy A to U, generate Q
1823* CWorkspace: need 2*M [tauq, taup] + M [work]
1824* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work]
1825* RWorkspace: need 0
1826*
1827 CALL clacpy( 'L', m, m, a, lda, u, ldu )
1828 CALL cungbr( 'Q', m, m, n, u, ldu, work( itauq ),
1829 $ work( nwork ), lwork-nwork+1, ierr )
1830*
1831* Generate P**H in A
1832* CWorkspace: need 2*M [tauq, taup] + M [work]
1833* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work]
1834* RWorkspace: need 0
1835*
1836 CALL cungbr( 'P', m, n, m, a, lda, work( itaup ),
1837 $ work( nwork ), lwork-nwork+1, ierr )
1838*
1839 ldwkvt = m
1840 IF( lwork .GE. m*n + 3*m ) THEN
1841*
1842* WORK( IVT ) is M by N
1843*
1844 nwork = ivt + ldwkvt*n
1845 chunk = n
1846 ELSE
1847*
1848* WORK( IVT ) is M by CHUNK
1849*
1850 chunk = ( lwork - 3*m ) / m
1851 nwork = ivt + ldwkvt*chunk
1852 END IF
1853*
1854* Perform bidiagonal SVD, computing left singular vectors
1855* of bidiagonal matrix in RWORK(IRU) and computing right
1856* singular vectors of bidiagonal matrix in RWORK(IRVT)
1857* CWorkspace: need 0
1858* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC
1859*
1860 CALL sbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),
1861 $ m, rwork( irvt ), m, dum, idum,
1862 $ rwork( nrwork ), iwork, info )
1863*
1864* Multiply Q in U by real matrix RWORK(IRVT)
1865* storing the result in WORK(IVT), copying to U
1866* CWorkspace: need 2*M [tauq, taup] + M*M [VT]
1867* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + 2*M*M [rwork]
1868*
1869 CALL clacrm( m, m, u, ldu, rwork( iru ), m, work( ivt ),
1870 $ ldwkvt, rwork( nrwork ) )
1871 CALL clacpy( 'F', m, m, work( ivt ), ldwkvt, u, ldu )
1872*
1873* Multiply RWORK(IRVT) by P**H in A, storing the
1874* result in WORK(IVT), copying to A
1875* CWorkspace: need 2*M [tauq, taup] + M*M [VT]
1876* CWorkspace: prefer 2*M [tauq, taup] + M*N [VT]
1877* RWorkspace: need M [e] + M*M [RVT] + 2*M*M [rwork]
1878* RWorkspace: prefer M [e] + M*M [RVT] + 2*M*N [rwork] < M + 5*M*M since N < 2*M here
1879*
1880 nrwork = iru
1881 DO 50 i = 1, n, chunk
1882 blk = min( n-i+1, chunk )
1883 CALL clarcm( m, blk, rwork( irvt ), m, a( 1, i ), lda,
1884 $ work( ivt ), ldwkvt, rwork( nrwork ) )
1885 CALL clacpy( 'F', m, blk, work( ivt ), ldwkvt,
1886 $ a( 1, i ), lda )
1887 50 CONTINUE
1888 ELSE IF( wntqs ) THEN
1889*
1890* Path 5ts (N >> M, JOBZ='S')
1891* Copy A to U, generate Q
1892* CWorkspace: need 2*M [tauq, taup] + M [work]
1893* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work]
1894* RWorkspace: need 0
1895*
1896 CALL clacpy( 'L', m, m, a, lda, u, ldu )
1897 CALL cungbr( 'Q', m, m, n, u, ldu, work( itauq ),
1898 $ work( nwork ), lwork-nwork+1, ierr )
1899*
1900* Copy A to VT, generate P**H
1901* CWorkspace: need 2*M [tauq, taup] + M [work]
1902* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work]
1903* RWorkspace: need 0
1904*
1905 CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
1906 CALL cungbr( 'P', m, n, m, vt, ldvt, work( itaup ),
1907 $ work( nwork ), lwork-nwork+1, ierr )
1908*
1909* Perform bidiagonal SVD, computing left singular vectors
1910* of bidiagonal matrix in RWORK(IRU) and computing right
1911* singular vectors of bidiagonal matrix in RWORK(IRVT)
1912* CWorkspace: need 0
1913* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC
1914*
1915 irvt = nrwork
1916 iru = irvt + m*m
1917 nrwork = iru + m*m
1918 CALL sbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),
1919 $ m, rwork( irvt ), m, dum, idum,
1920 $ rwork( nrwork ), iwork, info )
1921*
1922* Multiply Q in U by real matrix RWORK(IRU), storing the
1923* result in A, copying to U
1924* CWorkspace: need 0
1925* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + 2*M*M [rwork]
1926*
1927 CALL clacrm( m, m, u, ldu, rwork( iru ), m, a, lda,
1928 $ rwork( nrwork ) )
1929 CALL clacpy( 'F', m, m, a, lda, u, ldu )
1930*
1931* Multiply real matrix RWORK(IRVT) by P**H in VT,
1932* storing the result in A, copying to VT
1933* CWorkspace: need 0
1934* RWorkspace: need M [e] + M*M [RVT] + 2*M*N [rwork] < M + 5*M*M since N < 2*M here
1935*
1936 nrwork = iru
1937 CALL clarcm( m, n, rwork( irvt ), m, vt, ldvt, a, lda,
1938 $ rwork( nrwork ) )
1939 CALL clacpy( 'F', m, n, a, lda, vt, ldvt )
1940 ELSE
1941*
1942* Path 5ta (N >> M, JOBZ='A')
1943* Copy A to U, generate Q
1944* CWorkspace: need 2*M [tauq, taup] + M [work]
1945* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work]
1946* RWorkspace: need 0
1947*
1948 CALL clacpy( 'L', m, m, a, lda, u, ldu )
1949 CALL cungbr( 'Q', m, m, n, u, ldu, work( itauq ),
1950 $ work( nwork ), lwork-nwork+1, ierr )
1951*
1952* Copy A to VT, generate P**H
1953* CWorkspace: need 2*M [tauq, taup] + N [work]
1954* CWorkspace: prefer 2*M [tauq, taup] + N*NB [work]
1955* RWorkspace: need 0
1956*
1957 CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
1958 CALL cungbr( 'P', n, n, m, vt, ldvt, work( itaup ),
1959 $ work( nwork ), lwork-nwork+1, ierr )
1960*
1961* Perform bidiagonal SVD, computing left singular vectors
1962* of bidiagonal matrix in RWORK(IRU) and computing right
1963* singular vectors of bidiagonal matrix in RWORK(IRVT)
1964* CWorkspace: need 0
1965* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC
1966*
1967 irvt = nrwork
1968 iru = irvt + m*m
1969 nrwork = iru + m*m
1970 CALL sbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),
1971 $ m, rwork( irvt ), m, dum, idum,
1972 $ rwork( nrwork ), iwork, info )
1973*
1974* Multiply Q in U by real matrix RWORK(IRU), storing the
1975* result in A, copying to U
1976* CWorkspace: need 0
1977* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + 2*M*M [rwork]
1978*
1979 CALL clacrm( m, m, u, ldu, rwork( iru ), m, a, lda,
1980 $ rwork( nrwork ) )
1981 CALL clacpy( 'F', m, m, a, lda, u, ldu )
1982*
1983* Multiply real matrix RWORK(IRVT) by P**H in VT,
1984* storing the result in A, copying to VT
1985* CWorkspace: need 0
1986* RWorkspace: need M [e] + M*M [RVT] + 2*M*N [rwork] < M + 5*M*M since N < 2*M here
1987*
1988 nrwork = iru
1989 CALL clarcm( m, n, rwork( irvt ), m, vt, ldvt, a, lda,
1990 $ rwork( nrwork ) )
1991 CALL clacpy( 'F', m, n, a, lda, vt, ldvt )
1992 END IF
1993*
1994 ELSE
1995*
1996* N .LT. MNTHR2
1997*
1998* Path 6t (N > M, but not much larger)
1999* Reduce to bidiagonal form without LQ decomposition
2000* Use CUNMBR to compute singular vectors
2001*
2002 ie = 1
2003 nrwork = ie + m
2004 itauq = 1
2005 itaup = itauq + m
2006 nwork = itaup + m
2007*
2008* Bidiagonalize A
2009* CWorkspace: need 2*M [tauq, taup] + N [work]
2010* CWorkspace: prefer 2*M [tauq, taup] + (M+N)*NB [work]
2011* RWorkspace: need M [e]
2012*
2013 CALL cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
2014 $ work( itaup ), work( nwork ), lwork-nwork+1,
2015 $ ierr )
2016 IF( wntqn ) THEN
2017*
2018* Path 6tn (N > M, JOBZ='N')
2019* Compute singular values only
2020* CWorkspace: need 0
2021* RWorkspace: need M [e] + BDSPAC
2022*
2023 CALL sbdsdc( 'L', 'N', m, s, rwork( ie ), dum,1,dum,1,
2024 $ dum, idum, rwork( nrwork ), iwork, info )
2025 ELSE IF( wntqo ) THEN
2026* Path 6to (N > M, JOBZ='O')
2027 ldwkvt = m
2028 ivt = nwork
2029 IF( lwork .GE. m*n + 3*m ) THEN
2030*
2031* WORK( IVT ) is M by N
2032*
2033 CALL claset( 'F', m, n, czero, czero, work( ivt ),
2034 $ ldwkvt )
2035 nwork = ivt + ldwkvt*n
2036 ELSE
2037*
2038* WORK( IVT ) is M by CHUNK
2039*
2040 chunk = ( lwork - 3*m ) / m
2041 nwork = ivt + ldwkvt*chunk
2042 END IF
2043*
2044* Perform bidiagonal SVD, computing left singular vectors
2045* of bidiagonal matrix in RWORK(IRU) and computing right
2046* singular vectors of bidiagonal matrix in RWORK(IRVT)
2047* CWorkspace: need 0
2048* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC
2049*
2050 irvt = nrwork
2051 iru = irvt + m*m
2052 nrwork = iru + m*m
2053 CALL sbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),
2054 $ m, rwork( irvt ), m, dum, idum,
2055 $ rwork( nrwork ), iwork, info )
2056*
2057* Copy real matrix RWORK(IRU) to complex matrix U
2058* Overwrite U by left singular vectors of A
2059* CWorkspace: need 2*M [tauq, taup] + M*M [VT] + M [work]
2060* CWorkspace: prefer 2*M [tauq, taup] + M*M [VT] + M*NB [work]
2061* RWorkspace: need M [e] + M*M [RVT] + M*M [RU]
2062*
2063 CALL clacp2( 'F', m, m, rwork( iru ), m, u, ldu )
2064 CALL cunmbr( 'Q', 'L', 'N', m, m, n, a, lda,
2065 $ work( itauq ), u, ldu, work( nwork ),
2066 $ lwork-nwork+1, ierr )
2067*
2068 IF( lwork .GE. m*n + 3*m ) THEN
2069*
2070* Path 6to-fast
2071* Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
2072* Overwrite WORK(IVT) by right singular vectors of A,
2073* copying to A
2074* CWorkspace: need 2*M [tauq, taup] + M*N [VT] + M [work]
2075* CWorkspace: prefer 2*M [tauq, taup] + M*N [VT] + M*NB [work]
2076* RWorkspace: need M [e] + M*M [RVT]
2077*
2078 CALL clacp2( 'F', m, m, rwork( irvt ), m, work( ivt ),
2079 $ ldwkvt )
2080 CALL cunmbr( 'P', 'R', 'C', m, n, m, a, lda,
2081 $ work( itaup ), work( ivt ), ldwkvt,
2082 $ work( nwork ), lwork-nwork+1, ierr )
2083 CALL clacpy( 'F', m, n, work( ivt ), ldwkvt, a, lda )
2084 ELSE
2085*
2086* Path 6to-slow
2087* Generate P**H in A
2088* CWorkspace: need 2*M [tauq, taup] + M*M [VT] + M [work]
2089* CWorkspace: prefer 2*M [tauq, taup] + M*M [VT] + M*NB [work]
2090* RWorkspace: need 0
2091*
2092 CALL cungbr( 'P', m, n, m, a, lda, work( itaup ),
2093 $ work( nwork ), lwork-nwork+1, ierr )
2094*
2095* Multiply Q in A by real matrix RWORK(IRU), storing the
2096* result in WORK(IU), copying to A
2097* CWorkspace: need 2*M [tauq, taup] + M*M [VT]
2098* CWorkspace: prefer 2*M [tauq, taup] + M*N [VT]
2099* RWorkspace: need M [e] + M*M [RVT] + 2*M*M [rwork]
2100* RWorkspace: prefer M [e] + M*M [RVT] + 2*M*N [rwork] < M + 5*M*M since N < 2*M here
2101*
2102 nrwork = iru
2103 DO 60 i = 1, n, chunk
2104 blk = min( n-i+1, chunk )
2105 CALL clarcm( m, blk, rwork( irvt ), m, a( 1, i ),
2106 $ lda, work( ivt ), ldwkvt,
2107 $ rwork( nrwork ) )
2108 CALL clacpy( 'F', m, blk, work( ivt ), ldwkvt,
2109 $ a( 1, i ), lda )
2110 60 CONTINUE
2111 END IF
2112 ELSE IF( wntqs ) THEN
2113*
2114* Path 6ts (N > M, JOBZ='S')
2115* Perform bidiagonal SVD, computing left singular vectors
2116* of bidiagonal matrix in RWORK(IRU) and computing right
2117* singular vectors of bidiagonal matrix in RWORK(IRVT)
2118* CWorkspace: need 0
2119* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC
2120*
2121 irvt = nrwork
2122 iru = irvt + m*m
2123 nrwork = iru + m*m
2124 CALL sbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),
2125 $ m, rwork( irvt ), m, dum, idum,
2126 $ rwork( nrwork ), iwork, info )
2127*
2128* Copy real matrix RWORK(IRU) to complex matrix U
2129* Overwrite U by left singular vectors of A
2130* CWorkspace: need 2*M [tauq, taup] + M [work]
2131* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work]
2132* RWorkspace: need M [e] + M*M [RVT] + M*M [RU]
2133*
2134 CALL clacp2( 'F', m, m, rwork( iru ), m, u, ldu )
2135 CALL cunmbr( 'Q', 'L', 'N', m, m, n, a, lda,
2136 $ work( itauq ), u, ldu, work( nwork ),
2137 $ lwork-nwork+1, ierr )
2138*
2139* Copy real matrix RWORK(IRVT) to complex matrix VT
2140* Overwrite VT by right singular vectors of A
2141* CWorkspace: need 2*M [tauq, taup] + M [work]
2142* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work]
2143* RWorkspace: need M [e] + M*M [RVT]
2144*
2145 CALL claset( 'F', m, n, czero, czero, vt, ldvt )
2146 CALL clacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt )
2147 CALL cunmbr( 'P', 'R', 'C', m, n, m, a, lda,
2148 $ work( itaup ), vt, ldvt, work( nwork ),
2149 $ lwork-nwork+1, ierr )
2150 ELSE
2151*
2152* Path 6ta (N > M, JOBZ='A')
2153* Perform bidiagonal SVD, computing left singular vectors
2154* of bidiagonal matrix in RWORK(IRU) and computing right
2155* singular vectors of bidiagonal matrix in RWORK(IRVT)
2156* CWorkspace: need 0
2157* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC
2158*
2159 irvt = nrwork
2160 iru = irvt + m*m
2161 nrwork = iru + m*m
2162*
2163 CALL sbdsdc( 'L', 'I', m, s, rwork( ie ), rwork( iru ),
2164 $ m, rwork( irvt ), m, dum, idum,
2165 $ rwork( nrwork ), iwork, info )
2166*
2167* Copy real matrix RWORK(IRU) to complex matrix U
2168* Overwrite U by left singular vectors of A
2169* CWorkspace: need 2*M [tauq, taup] + M [work]
2170* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work]
2171* RWorkspace: need M [e] + M*M [RVT] + M*M [RU]
2172*
2173 CALL clacp2( 'F', m, m, rwork( iru ), m, u, ldu )
2174 CALL cunmbr( 'Q', 'L', 'N', m, m, n, a, lda,
2175 $ work( itauq ), u, ldu, work( nwork ),
2176 $ lwork-nwork+1, ierr )
2177*
2178* Set all of VT to identity matrix
2179*
2180 CALL claset( 'F', n, n, czero, cone, vt, ldvt )
2181*
2182* Copy real matrix RWORK(IRVT) to complex matrix VT
2183* Overwrite VT by right singular vectors of A
2184* CWorkspace: need 2*M [tauq, taup] + N [work]
2185* CWorkspace: prefer 2*M [tauq, taup] + N*NB [work]
2186* RWorkspace: need M [e] + M*M [RVT]
2187*
2188 CALL clacp2( 'F', m, m, rwork( irvt ), m, vt, ldvt )
2189 CALL cunmbr( 'P', 'R', 'C', n, n, m, a, lda,
2190 $ work( itaup ), vt, ldvt, work( nwork ),
2191 $ lwork-nwork+1, ierr )
2192 END IF
2193*
2194 END IF
2195*
2196 END IF
2197*
2198* Undo scaling if necessary
2199*
2200 IF( iscl.EQ.1 ) THEN
2201 IF( anrm.GT.bignum )
2202 $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
2203 $ ierr )
2204 IF( info.NE.0 .AND. anrm.GT.bignum )
2205 $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn-1, 1,
2206 $ rwork( ie ), minmn, ierr )
2207 IF( anrm.LT.smlnum )
2208 $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
2209 $ ierr )
2210 IF( info.NE.0 .AND. anrm.LT.smlnum )
2211 $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1,
2212 $ rwork( ie ), minmn, ierr )
2213 END IF
2214*
2215* Return optimal workspace in WORK(1)
2216*
2217 work( 1 ) = sroundup_lwork( maxwrk )
2218*
2219 RETURN
2220*
2221* End of CGESDD
2222*
logical function sisnan(sin)
SISNAN tests input for NaN.
Definition sisnan.f:59
subroutine sbdsdc(uplo, compq, n, d, e, u, ldu, vt, ldvt, q, iq, work, iwork, info)
SBDSDC
Definition sbdsdc.f:205
subroutine cungbr(vect, m, n, k, a, lda, tau, work, lwork, info)
CUNGBR
Definition cungbr.f:157
real function clange(norm, m, n, a, lda, work)
CLANGE returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value ...
Definition clange.f:115
subroutine cgebrd(m, n, a, lda, d, e, tauq, taup, work, lwork, info)
CGEBRD
Definition cgebrd.f:206
subroutine clacrm(m, n, a, lda, b, ldb, c, ldc, rwork)
CLACRM multiplies a complex matrix by a square real matrix.
Definition clacrm.f:114
subroutine clacp2(uplo, m, n, a, lda, b, ldb)
CLACP2 copies all or part of a real two-dimensional array to a complex array.
Definition clacp2.f:104
subroutine clarcm(m, n, a, lda, b, ldb, c, ldc, rwork)
CLARCM copies all or part of a real two-dimensional array to a complex array.
Definition clarcm.f:114
subroutine cunglq(m, n, k, a, lda, tau, work, lwork, info)
CUNGLQ
Definition cunglq.f:127
subroutine cunmbr(vect, side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
CUNMBR
Definition cunmbr.f:197
subroutine cgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
CGEMM
Definition cgemm.f:187
real function sroundup_lwork(lwork)
SROUNDUP_LWORK

◆ cgesvd()

subroutine cgesvd ( character jobu,
character jobvt,
integer m,
integer n,
complex, dimension( lda, * ) a,
integer lda,
real, dimension( * ) s,
complex, dimension( ldu, * ) u,
integer ldu,
complex, dimension( ldvt, * ) vt,
integer ldvt,
complex, dimension( * ) work,
integer lwork,
real, dimension( * ) rwork,
integer info )

CGESVD computes the singular value decomposition (SVD) for GE matrices

Download CGESVD + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> CGESVD computes the singular value decomposition (SVD) of a complex
!> M-by-N matrix A, optionally computing the left and/or right singular
!> vectors. The SVD is written
!>
!>      A = U * SIGMA * conjugate-transpose(V)
!>
!> where SIGMA is an M-by-N matrix which is zero except for its
!> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
!> V is an N-by-N unitary matrix.  The diagonal elements of SIGMA
!> are the singular values of A; they are real and non-negative, and
!> are returned in descending order.  The first min(m,n) columns of
!> U and V are the left and right singular vectors of A.
!>
!> Note that the routine returns V**H, not V.
!> 
Parameters
[in]JOBU
!>          JOBU is CHARACTER*1
!>          Specifies options for computing all or part of the matrix U:
!>          = 'A':  all M columns of U are returned in array U:
!>          = 'S':  the first min(m,n) columns of U (the left singular
!>                  vectors) are returned in the array U;
!>          = 'O':  the first min(m,n) columns of U (the left singular
!>                  vectors) are overwritten on the array A;
!>          = 'N':  no columns of U (no left singular vectors) are
!>                  computed.
!> 
[in]JOBVT
!>          JOBVT is CHARACTER*1
!>          Specifies options for computing all or part of the matrix
!>          V**H:
!>          = 'A':  all N rows of V**H are returned in the array VT;
!>          = 'S':  the first min(m,n) rows of V**H (the right singular
!>                  vectors) are returned in the array VT;
!>          = 'O':  the first min(m,n) rows of V**H (the right singular
!>                  vectors) are overwritten on the array A;
!>          = 'N':  no rows of V**H (no right singular vectors) are
!>                  computed.
!>
!>          JOBVT and JOBU cannot both be 'O'.
!> 
[in]M
!>          M is INTEGER
!>          The number of rows of the input matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>          The number of columns of the input matrix A.  N >= 0.
!> 
[in,out]A
!>          A is COMPLEX array, dimension (LDA,N)
!>          On entry, the M-by-N matrix A.
!>          On exit,
!>          if JOBU = 'O',  A is overwritten with the first min(m,n)
!>                          columns of U (the left singular vectors,
!>                          stored columnwise);
!>          if JOBVT = 'O', A is overwritten with the first min(m,n)
!>                          rows of V**H (the right singular vectors,
!>                          stored rowwise);
!>          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
!>                          are destroyed.
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A.  LDA >= max(1,M).
!> 
[out]S
!>          S is REAL array, dimension (min(M,N))
!>          The singular values of A, sorted so that S(i) >= S(i+1).
!> 
[out]U
!>          U is COMPLEX array, dimension (LDU,UCOL)
!>          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
!>          If JOBU = 'A', U contains the M-by-M unitary matrix U;
!>          if JOBU = 'S', U contains the first min(m,n) columns of U
!>          (the left singular vectors, stored columnwise);
!>          if JOBU = 'N' or 'O', U is not referenced.
!> 
[in]LDU
!>          LDU is INTEGER
!>          The leading dimension of the array U.  LDU >= 1; if
!>          JOBU = 'S' or 'A', LDU >= M.
!> 
[out]VT
!>          VT is COMPLEX array, dimension (LDVT,N)
!>          If JOBVT = 'A', VT contains the N-by-N unitary matrix
!>          V**H;
!>          if JOBVT = 'S', VT contains the first min(m,n) rows of
!>          V**H (the right singular vectors, stored rowwise);
!>          if JOBVT = 'N' or 'O', VT is not referenced.
!> 
[in]LDVT
!>          LDVT is INTEGER
!>          The leading dimension of the array VT.  LDVT >= 1; if
!>          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
!> 
[out]WORK
!>          WORK is COMPLEX array, dimension (MAX(1,LWORK))
!>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          The dimension of the array WORK.
!>          LWORK >=  MAX(1,2*MIN(M,N)+MAX(M,N)).
!>          For good performance, LWORK should generally be larger.
!>
!>          If LWORK = -1, then a workspace query is assumed; the routine
!>          only calculates the optimal size of the WORK array, returns
!>          this value as the first entry of the WORK array, and no error
!>          message related to LWORK is issued by XERBLA.
!> 
[out]RWORK
!>          RWORK is REAL array, dimension (5*min(M,N))
!>          On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the
!>          unconverged superdiagonal elements of an upper bidiagonal
!>          matrix B whose diagonal is in S (not necessarily sorted).
!>          B satisfies A = U * B * VT, so it has the same singular
!>          values as A, and singular vectors related by U and VT.
!> 
[out]INFO
!>          INFO is INTEGER
!>          = 0:  successful exit.
!>          < 0:  if INFO = -i, the i-th argument had an illegal value.
!>          > 0:  if CBDSQR did not converge, INFO specifies how many
!>                superdiagonals of an intermediate bidiagonal form B
!>                did not converge to zero. See the description of RWORK
!>                above for details.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 212 of file cgesvd.f.

214*
215* -- LAPACK driver routine --
216* -- LAPACK is a software package provided by Univ. of Tennessee, --
217* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
218*
219* .. Scalar Arguments ..
220 CHARACTER JOBU, JOBVT
221 INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
222* ..
223* .. Array Arguments ..
224 REAL RWORK( * ), S( * )
225 COMPLEX A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
226 $ WORK( * )
227* ..
228*
229* =====================================================================
230*
231* .. Parameters ..
232 COMPLEX CZERO, CONE
233 parameter( czero = ( 0.0e0, 0.0e0 ),
234 $ cone = ( 1.0e0, 0.0e0 ) )
235 REAL ZERO, ONE
236 parameter( zero = 0.0e0, one = 1.0e0 )
237* ..
238* .. Local Scalars ..
239 LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
240 $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
241 INTEGER BLK, CHUNK, I, IE, IERR, IR, IRWORK, ISCL,
242 $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
243 $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
244 $ NRVT, WRKBL
245 INTEGER LWORK_CGEQRF, LWORK_CUNGQR_N, LWORK_CUNGQR_M,
246 $ LWORK_CGEBRD, LWORK_CUNGBR_P, LWORK_CUNGBR_Q,
247 $ LWORK_CGELQF, LWORK_CUNGLQ_N, LWORK_CUNGLQ_M
248 REAL ANRM, BIGNUM, EPS, SMLNUM
249* ..
250* .. Local Arrays ..
251 REAL DUM( 1 )
252 COMPLEX CDUM( 1 )
253* ..
254* .. External Subroutines ..
255 EXTERNAL cbdsqr, cgebrd, cgelqf, cgemm, cgeqrf, clacpy,
257 $ slascl, xerbla
258* ..
259* .. External Functions ..
260 LOGICAL LSAME
261 INTEGER ILAENV
262 REAL CLANGE, SLAMCH
263 EXTERNAL lsame, ilaenv, clange, slamch
264* ..
265* .. Intrinsic Functions ..
266 INTRINSIC max, min, sqrt
267* ..
268* .. Executable Statements ..
269*
270* Test the input arguments
271*
272 info = 0
273 minmn = min( m, n )
274 wntua = lsame( jobu, 'A' )
275 wntus = lsame( jobu, 'S' )
276 wntuas = wntua .OR. wntus
277 wntuo = lsame( jobu, 'O' )
278 wntun = lsame( jobu, 'N' )
279 wntva = lsame( jobvt, 'A' )
280 wntvs = lsame( jobvt, 'S' )
281 wntvas = wntva .OR. wntvs
282 wntvo = lsame( jobvt, 'O' )
283 wntvn = lsame( jobvt, 'N' )
284 lquery = ( lwork.EQ.-1 )
285*
286 IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
287 info = -1
288 ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
289 $ ( wntvo .AND. wntuo ) ) THEN
290 info = -2
291 ELSE IF( m.LT.0 ) THEN
292 info = -3
293 ELSE IF( n.LT.0 ) THEN
294 info = -4
295 ELSE IF( lda.LT.max( 1, m ) ) THEN
296 info = -6
297 ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
298 info = -9
299 ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
300 $ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
301 info = -11
302 END IF
303*
304* Compute workspace
305* (Note: Comments in the code beginning "Workspace:" describe the
306* minimal amount of workspace needed at that point in the code,
307* as well as the preferred amount for good performance.
308* CWorkspace refers to complex workspace, and RWorkspace to
309* real workspace. NB refers to the optimal block size for the
310* immediately following subroutine, as returned by ILAENV.)
311*
312 IF( info.EQ.0 ) THEN
313 minwrk = 1
314 maxwrk = 1
315 IF( m.GE.n .AND. minmn.GT.0 ) THEN
316*
317* Space needed for ZBDSQR is BDSPAC = 5*N
318*
319 mnthr = ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 )
320* Compute space needed for CGEQRF
321 CALL cgeqrf( m, n, a, lda, cdum(1), cdum(1), -1, ierr )
322 lwork_cgeqrf = int( cdum(1) )
323* Compute space needed for CUNGQR
324 CALL cungqr( m, n, n, a, lda, cdum(1), cdum(1), -1, ierr )
325 lwork_cungqr_n = int( cdum(1) )
326 CALL cungqr( m, m, n, a, lda, cdum(1), cdum(1), -1, ierr )
327 lwork_cungqr_m = int( cdum(1) )
328* Compute space needed for CGEBRD
329 CALL cgebrd( n, n, a, lda, s, dum(1), cdum(1),
330 $ cdum(1), cdum(1), -1, ierr )
331 lwork_cgebrd = int( cdum(1) )
332* Compute space needed for CUNGBR
333 CALL cungbr( 'P', n, n, n, a, lda, cdum(1),
334 $ cdum(1), -1, ierr )
335 lwork_cungbr_p = int( cdum(1) )
336 CALL cungbr( 'Q', n, n, n, a, lda, cdum(1),
337 $ cdum(1), -1, ierr )
338 lwork_cungbr_q = int( cdum(1) )
339*
340 mnthr = ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 )
341 IF( m.GE.mnthr ) THEN
342 IF( wntun ) THEN
343*
344* Path 1 (M much larger than N, JOBU='N')
345*
346 maxwrk = n + lwork_cgeqrf
347 maxwrk = max( maxwrk, 2*n+lwork_cgebrd )
348 IF( wntvo .OR. wntvas )
349 $ maxwrk = max( maxwrk, 2*n+lwork_cungbr_p )
350 minwrk = 3*n
351 ELSE IF( wntuo .AND. wntvn ) THEN
352*
353* Path 2 (M much larger than N, JOBU='O', JOBVT='N')
354*
355 wrkbl = n + lwork_cgeqrf
356 wrkbl = max( wrkbl, n+lwork_cungqr_n )
357 wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
358 wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
359 maxwrk = max( n*n+wrkbl, n*n+m*n )
360 minwrk = 2*n + m
361 ELSE IF( wntuo .AND. wntvas ) THEN
362*
363* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
364* 'A')
365*
366 wrkbl = n + lwork_cgeqrf
367 wrkbl = max( wrkbl, n+lwork_cungqr_n )
368 wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
369 wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
370 wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
371 maxwrk = max( n*n+wrkbl, n*n+m*n )
372 minwrk = 2*n + m
373 ELSE IF( wntus .AND. wntvn ) THEN
374*
375* Path 4 (M much larger than N, JOBU='S', JOBVT='N')
376*
377 wrkbl = n + lwork_cgeqrf
378 wrkbl = max( wrkbl, n+lwork_cungqr_n )
379 wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
380 wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
381 maxwrk = n*n + wrkbl
382 minwrk = 2*n + m
383 ELSE IF( wntus .AND. wntvo ) THEN
384*
385* Path 5 (M much larger than N, JOBU='S', JOBVT='O')
386*
387 wrkbl = n + lwork_cgeqrf
388 wrkbl = max( wrkbl, n+lwork_cungqr_n )
389 wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
390 wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
391 wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
392 maxwrk = 2*n*n + wrkbl
393 minwrk = 2*n + m
394 ELSE IF( wntus .AND. wntvas ) THEN
395*
396* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
397* 'A')
398*
399 wrkbl = n + lwork_cgeqrf
400 wrkbl = max( wrkbl, n+lwork_cungqr_n )
401 wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
402 wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
403 wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
404 maxwrk = n*n + wrkbl
405 minwrk = 2*n + m
406 ELSE IF( wntua .AND. wntvn ) THEN
407*
408* Path 7 (M much larger than N, JOBU='A', JOBVT='N')
409*
410 wrkbl = n + lwork_cgeqrf
411 wrkbl = max( wrkbl, n+lwork_cungqr_m )
412 wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
413 wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
414 maxwrk = n*n + wrkbl
415 minwrk = 2*n + m
416 ELSE IF( wntua .AND. wntvo ) THEN
417*
418* Path 8 (M much larger than N, JOBU='A', JOBVT='O')
419*
420 wrkbl = n + lwork_cgeqrf
421 wrkbl = max( wrkbl, n+lwork_cungqr_m )
422 wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
423 wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
424 wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
425 maxwrk = 2*n*n + wrkbl
426 minwrk = 2*n + m
427 ELSE IF( wntua .AND. wntvas ) THEN
428*
429* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
430* 'A')
431*
432 wrkbl = n + lwork_cgeqrf
433 wrkbl = max( wrkbl, n+lwork_cungqr_m )
434 wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
435 wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
436 wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
437 maxwrk = n*n + wrkbl
438 minwrk = 2*n + m
439 END IF
440 ELSE
441*
442* Path 10 (M at least N, but not much larger)
443*
444 CALL cgebrd( m, n, a, lda, s, dum(1), cdum(1),
445 $ cdum(1), cdum(1), -1, ierr )
446 lwork_cgebrd = int( cdum(1) )
447 maxwrk = 2*n + lwork_cgebrd
448 IF( wntus .OR. wntuo ) THEN
449 CALL cungbr( 'Q', m, n, n, a, lda, cdum(1),
450 $ cdum(1), -1, ierr )
451 lwork_cungbr_q = int( cdum(1) )
452 maxwrk = max( maxwrk, 2*n+lwork_cungbr_q )
453 END IF
454 IF( wntua ) THEN
455 CALL cungbr( 'Q', m, m, n, a, lda, cdum(1),
456 $ cdum(1), -1, ierr )
457 lwork_cungbr_q = int( cdum(1) )
458 maxwrk = max( maxwrk, 2*n+lwork_cungbr_q )
459 END IF
460 IF( .NOT.wntvn ) THEN
461 maxwrk = max( maxwrk, 2*n+lwork_cungbr_p )
462 END IF
463 minwrk = 2*n + m
464 END IF
465 ELSE IF( minmn.GT.0 ) THEN
466*
467* Space needed for CBDSQR is BDSPAC = 5*M
468*
469 mnthr = ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 )
470* Compute space needed for CGELQF
471 CALL cgelqf( m, n, a, lda, cdum(1), cdum(1), -1, ierr )
472 lwork_cgelqf = int( cdum(1) )
473* Compute space needed for CUNGLQ
474 CALL cunglq( n, n, m, cdum(1), n, cdum(1), cdum(1), -1,
475 $ ierr )
476 lwork_cunglq_n = int( cdum(1) )
477 CALL cunglq( m, n, m, a, lda, cdum(1), cdum(1), -1, ierr )
478 lwork_cunglq_m = int( cdum(1) )
479* Compute space needed for CGEBRD
480 CALL cgebrd( m, m, a, lda, s, dum(1), cdum(1),
481 $ cdum(1), cdum(1), -1, ierr )
482 lwork_cgebrd = int( cdum(1) )
483* Compute space needed for CUNGBR P
484 CALL cungbr( 'P', m, m, m, a, n, cdum(1),
485 $ cdum(1), -1, ierr )
486 lwork_cungbr_p = int( cdum(1) )
487* Compute space needed for CUNGBR Q
488 CALL cungbr( 'Q', m, m, m, a, n, cdum(1),
489 $ cdum(1), -1, ierr )
490 lwork_cungbr_q = int( cdum(1) )
491 IF( n.GE.mnthr ) THEN
492 IF( wntvn ) THEN
493*
494* Path 1t(N much larger than M, JOBVT='N')
495*
496 maxwrk = m + lwork_cgelqf
497 maxwrk = max( maxwrk, 2*m+lwork_cgebrd )
498 IF( wntuo .OR. wntuas )
499 $ maxwrk = max( maxwrk, 2*m+lwork_cungbr_q )
500 minwrk = 3*m
501 ELSE IF( wntvo .AND. wntun ) THEN
502*
503* Path 2t(N much larger than M, JOBU='N', JOBVT='O')
504*
505 wrkbl = m + lwork_cgelqf
506 wrkbl = max( wrkbl, m+lwork_cunglq_m )
507 wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
508 wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
509 maxwrk = max( m*m+wrkbl, m*m+m*n )
510 minwrk = 2*m + n
511 ELSE IF( wntvo .AND. wntuas ) THEN
512*
513* Path 3t(N much larger than M, JOBU='S' or 'A',
514* JOBVT='O')
515*
516 wrkbl = m + lwork_cgelqf
517 wrkbl = max( wrkbl, m+lwork_cunglq_m )
518 wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
519 wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
520 wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
521 maxwrk = max( m*m+wrkbl, m*m+m*n )
522 minwrk = 2*m + n
523 ELSE IF( wntvs .AND. wntun ) THEN
524*
525* Path 4t(N much larger than M, JOBU='N', JOBVT='S')
526*
527 wrkbl = m + lwork_cgelqf
528 wrkbl = max( wrkbl, m+lwork_cunglq_m )
529 wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
530 wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
531 maxwrk = m*m + wrkbl
532 minwrk = 2*m + n
533 ELSE IF( wntvs .AND. wntuo ) THEN
534*
535* Path 5t(N much larger than M, JOBU='O', JOBVT='S')
536*
537 wrkbl = m + lwork_cgelqf
538 wrkbl = max( wrkbl, m+lwork_cunglq_m )
539 wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
540 wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
541 wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
542 maxwrk = 2*m*m + wrkbl
543 minwrk = 2*m + n
544 ELSE IF( wntvs .AND. wntuas ) THEN
545*
546* Path 6t(N much larger than M, JOBU='S' or 'A',
547* JOBVT='S')
548*
549 wrkbl = m + lwork_cgelqf
550 wrkbl = max( wrkbl, m+lwork_cunglq_m )
551 wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
552 wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
553 wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
554 maxwrk = m*m + wrkbl
555 minwrk = 2*m + n
556 ELSE IF( wntva .AND. wntun ) THEN
557*
558* Path 7t(N much larger than M, JOBU='N', JOBVT='A')
559*
560 wrkbl = m + lwork_cgelqf
561 wrkbl = max( wrkbl, m+lwork_cunglq_n )
562 wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
563 wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
564 maxwrk = m*m + wrkbl
565 minwrk = 2*m + n
566 ELSE IF( wntva .AND. wntuo ) THEN
567*
568* Path 8t(N much larger than M, JOBU='O', JOBVT='A')
569*
570 wrkbl = m + lwork_cgelqf
571 wrkbl = max( wrkbl, m+lwork_cunglq_n )
572 wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
573 wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
574 wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
575 maxwrk = 2*m*m + wrkbl
576 minwrk = 2*m + n
577 ELSE IF( wntva .AND. wntuas ) THEN
578*
579* Path 9t(N much larger than M, JOBU='S' or 'A',
580* JOBVT='A')
581*
582 wrkbl = m + lwork_cgelqf
583 wrkbl = max( wrkbl, m+lwork_cunglq_n )
584 wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
585 wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
586 wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
587 maxwrk = m*m + wrkbl
588 minwrk = 2*m + n
589 END IF
590 ELSE
591*
592* Path 10t(N greater than M, but not much larger)
593*
594 CALL cgebrd( m, n, a, lda, s, dum(1), cdum(1),
595 $ cdum(1), cdum(1), -1, ierr )
596 lwork_cgebrd = int( cdum(1) )
597 maxwrk = 2*m + lwork_cgebrd
598 IF( wntvs .OR. wntvo ) THEN
599* Compute space needed for CUNGBR P
600 CALL cungbr( 'P', m, n, m, a, n, cdum(1),
601 $ cdum(1), -1, ierr )
602 lwork_cungbr_p = int( cdum(1) )
603 maxwrk = max( maxwrk, 2*m+lwork_cungbr_p )
604 END IF
605 IF( wntva ) THEN
606 CALL cungbr( 'P', n, n, m, a, n, cdum(1),
607 $ cdum(1), -1, ierr )
608 lwork_cungbr_p = int( cdum(1) )
609 maxwrk = max( maxwrk, 2*m+lwork_cungbr_p )
610 END IF
611 IF( .NOT.wntun ) THEN
612 maxwrk = max( maxwrk, 2*m+lwork_cungbr_q )
613 END IF
614 minwrk = 2*m + n
615 END IF
616 END IF
617 maxwrk = max( minwrk, maxwrk )
618 work( 1 ) = maxwrk
619*
620 IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
621 info = -13
622 END IF
623 END IF
624*
625 IF( info.NE.0 ) THEN
626 CALL xerbla( 'CGESVD', -info )
627 RETURN
628 ELSE IF( lquery ) THEN
629 RETURN
630 END IF
631*
632* Quick return if possible
633*
634 IF( m.EQ.0 .OR. n.EQ.0 ) THEN
635 RETURN
636 END IF
637*
638* Get machine constants
639*
640 eps = slamch( 'P' )
641 smlnum = sqrt( slamch( 'S' ) ) / eps
642 bignum = one / smlnum
643*
644* Scale A if max element outside range [SMLNUM,BIGNUM]
645*
646 anrm = clange( 'M', m, n, a, lda, dum )
647 iscl = 0
648 IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
649 iscl = 1
650 CALL clascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
651 ELSE IF( anrm.GT.bignum ) THEN
652 iscl = 1
653 CALL clascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
654 END IF
655*
656 IF( m.GE.n ) THEN
657*
658* A has at least as many rows as columns. If A has sufficiently
659* more rows than columns, first reduce using the QR
660* decomposition (if sufficient workspace available)
661*
662 IF( m.GE.mnthr ) THEN
663*
664 IF( wntun ) THEN
665*
666* Path 1 (M much larger than N, JOBU='N')
667* No left singular vectors to be computed
668*
669 itau = 1
670 iwork = itau + n
671*
672* Compute A=Q*R
673* (CWorkspace: need 2*N, prefer N+N*NB)
674* (RWorkspace: need 0)
675*
676 CALL cgeqrf( m, n, a, lda, work( itau ), work( iwork ),
677 $ lwork-iwork+1, ierr )
678*
679* Zero out below R
680*
681 IF( n .GT. 1 ) THEN
682 CALL claset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),
683 $ lda )
684 END IF
685 ie = 1
686 itauq = 1
687 itaup = itauq + n
688 iwork = itaup + n
689*
690* Bidiagonalize R in A
691* (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
692* (RWorkspace: need N)
693*
694 CALL cgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),
695 $ work( itaup ), work( iwork ), lwork-iwork+1,
696 $ ierr )
697 ncvt = 0
698 IF( wntvo .OR. wntvas ) THEN
699*
700* If right singular vectors desired, generate P'.
701* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
702* (RWorkspace: 0)
703*
704 CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),
705 $ work( iwork ), lwork-iwork+1, ierr )
706 ncvt = n
707 END IF
708 irwork = ie + n
709*
710* Perform bidiagonal QR iteration, computing right
711* singular vectors of A in A if desired
712* (CWorkspace: 0)
713* (RWorkspace: need BDSPAC)
714*
715 CALL cbdsqr( 'U', n, ncvt, 0, 0, s, rwork( ie ), a, lda,
716 $ cdum, 1, cdum, 1, rwork( irwork ), info )
717*
718* If right singular vectors desired in VT, copy them there
719*
720 IF( wntvas )
721 $ CALL clacpy( 'F', n, n, a, lda, vt, ldvt )
722*
723 ELSE IF( wntuo .AND. wntvn ) THEN
724*
725* Path 2 (M much larger than N, JOBU='O', JOBVT='N')
726* N left singular vectors to be overwritten on A and
727* no right singular vectors to be computed
728*
729 IF( lwork.GE.n*n+3*n ) THEN
730*
731* Sufficient workspace for a fast algorithm
732*
733 ir = 1
734 IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN
735*
736* WORK(IU) is LDA by N, WORK(IR) is LDA by N
737*
738 ldwrku = lda
739 ldwrkr = lda
740 ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN
741*
742* WORK(IU) is LDA by N, WORK(IR) is N by N
743*
744 ldwrku = lda
745 ldwrkr = n
746 ELSE
747*
748* WORK(IU) is LDWRKU by N, WORK(IR) is N by N
749*
750 ldwrku = ( lwork-n*n ) / n
751 ldwrkr = n
752 END IF
753 itau = ir + ldwrkr*n
754 iwork = itau + n
755*
756* Compute A=Q*R
757* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
758* (RWorkspace: 0)
759*
760 CALL cgeqrf( m, n, a, lda, work( itau ),
761 $ work( iwork ), lwork-iwork+1, ierr )
762*
763* Copy R to WORK(IR) and zero out below it
764*
765 CALL clacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
766 CALL claset( 'L', n-1, n-1, czero, czero,
767 $ work( ir+1 ), ldwrkr )
768*
769* Generate Q in A
770* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
771* (RWorkspace: 0)
772*
773 CALL cungqr( m, n, n, a, lda, work( itau ),
774 $ work( iwork ), lwork-iwork+1, ierr )
775 ie = 1
776 itauq = itau
777 itaup = itauq + n
778 iwork = itaup + n
779*
780* Bidiagonalize R in WORK(IR)
781* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
782* (RWorkspace: need N)
783*
784 CALL cgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),
785 $ work( itauq ), work( itaup ),
786 $ work( iwork ), lwork-iwork+1, ierr )
787*
788* Generate left vectors bidiagonalizing R
789* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
790* (RWorkspace: need 0)
791*
792 CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,
793 $ work( itauq ), work( iwork ),
794 $ lwork-iwork+1, ierr )
795 irwork = ie + n
796*
797* Perform bidiagonal QR iteration, computing left
798* singular vectors of R in WORK(IR)
799* (CWorkspace: need N*N)
800* (RWorkspace: need BDSPAC)
801*
802 CALL cbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum, 1,
803 $ work( ir ), ldwrkr, cdum, 1,
804 $ rwork( irwork ), info )
805 iu = itauq
806*
807* Multiply Q in A by left singular vectors of R in
808* WORK(IR), storing result in WORK(IU) and copying to A
809* (CWorkspace: need N*N+N, prefer N*N+M*N)
810* (RWorkspace: 0)
811*
812 DO 10 i = 1, m, ldwrku
813 chunk = min( m-i+1, ldwrku )
814 CALL cgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),
815 $ lda, work( ir ), ldwrkr, czero,
816 $ work( iu ), ldwrku )
817 CALL clacpy( 'F', chunk, n, work( iu ), ldwrku,
818 $ a( i, 1 ), lda )
819 10 CONTINUE
820*
821 ELSE
822*
823* Insufficient workspace for a fast algorithm
824*
825 ie = 1
826 itauq = 1
827 itaup = itauq + n
828 iwork = itaup + n
829*
830* Bidiagonalize A
831* (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
832* (RWorkspace: N)
833*
834 CALL cgebrd( m, n, a, lda, s, rwork( ie ),
835 $ work( itauq ), work( itaup ),
836 $ work( iwork ), lwork-iwork+1, ierr )
837*
838* Generate left vectors bidiagonalizing A
839* (CWorkspace: need 3*N, prefer 2*N+N*NB)
840* (RWorkspace: 0)
841*
842 CALL cungbr( 'Q', m, n, n, a, lda, work( itauq ),
843 $ work( iwork ), lwork-iwork+1, ierr )
844 irwork = ie + n
845*
846* Perform bidiagonal QR iteration, computing left
847* singular vectors of A in A
848* (CWorkspace: need 0)
849* (RWorkspace: need BDSPAC)
850*
851 CALL cbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum, 1,
852 $ a, lda, cdum, 1, rwork( irwork ), info )
853*
854 END IF
855*
856 ELSE IF( wntuo .AND. wntvas ) THEN
857*
858* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
859* N left singular vectors to be overwritten on A and
860* N right singular vectors to be computed in VT
861*
862 IF( lwork.GE.n*n+3*n ) THEN
863*
864* Sufficient workspace for a fast algorithm
865*
866 ir = 1
867 IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN
868*
869* WORK(IU) is LDA by N and WORK(IR) is LDA by N
870*
871 ldwrku = lda
872 ldwrkr = lda
873 ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN
874*
875* WORK(IU) is LDA by N and WORK(IR) is N by N
876*
877 ldwrku = lda
878 ldwrkr = n
879 ELSE
880*
881* WORK(IU) is LDWRKU by N and WORK(IR) is N by N
882*
883 ldwrku = ( lwork-n*n ) / n
884 ldwrkr = n
885 END IF
886 itau = ir + ldwrkr*n
887 iwork = itau + n
888*
889* Compute A=Q*R
890* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
891* (RWorkspace: 0)
892*
893 CALL cgeqrf( m, n, a, lda, work( itau ),
894 $ work( iwork ), lwork-iwork+1, ierr )
895*
896* Copy R to VT, zeroing out below it
897*
898 CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
899 IF( n.GT.1 )
900 $ CALL claset( 'L', n-1, n-1, czero, czero,
901 $ vt( 2, 1 ), ldvt )
902*
903* Generate Q in A
904* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
905* (RWorkspace: 0)
906*
907 CALL cungqr( m, n, n, a, lda, work( itau ),
908 $ work( iwork ), lwork-iwork+1, ierr )
909 ie = 1
910 itauq = itau
911 itaup = itauq + n
912 iwork = itaup + n
913*
914* Bidiagonalize R in VT, copying result to WORK(IR)
915* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
916* (RWorkspace: need N)
917*
918 CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),
919 $ work( itauq ), work( itaup ),
920 $ work( iwork ), lwork-iwork+1, ierr )
921 CALL clacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
922*
923* Generate left vectors bidiagonalizing R in WORK(IR)
924* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
925* (RWorkspace: 0)
926*
927 CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,
928 $ work( itauq ), work( iwork ),
929 $ lwork-iwork+1, ierr )
930*
931* Generate right vectors bidiagonalizing R in VT
932* (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB)
933* (RWorkspace: 0)
934*
935 CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
936 $ work( iwork ), lwork-iwork+1, ierr )
937 irwork = ie + n
938*
939* Perform bidiagonal QR iteration, computing left
940* singular vectors of R in WORK(IR) and computing right
941* singular vectors of R in VT
942* (CWorkspace: need N*N)
943* (RWorkspace: need BDSPAC)
944*
945 CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
946 $ ldvt, work( ir ), ldwrkr, cdum, 1,
947 $ rwork( irwork ), info )
948 iu = itauq
949*
950* Multiply Q in A by left singular vectors of R in
951* WORK(IR), storing result in WORK(IU) and copying to A
952* (CWorkspace: need N*N+N, prefer N*N+M*N)
953* (RWorkspace: 0)
954*
955 DO 20 i = 1, m, ldwrku
956 chunk = min( m-i+1, ldwrku )
957 CALL cgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),
958 $ lda, work( ir ), ldwrkr, czero,
959 $ work( iu ), ldwrku )
960 CALL clacpy( 'F', chunk, n, work( iu ), ldwrku,
961 $ a( i, 1 ), lda )
962 20 CONTINUE
963*
964 ELSE
965*
966* Insufficient workspace for a fast algorithm
967*
968 itau = 1
969 iwork = itau + n
970*
971* Compute A=Q*R
972* (CWorkspace: need 2*N, prefer N+N*NB)
973* (RWorkspace: 0)
974*
975 CALL cgeqrf( m, n, a, lda, work( itau ),
976 $ work( iwork ), lwork-iwork+1, ierr )
977*
978* Copy R to VT, zeroing out below it
979*
980 CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
981 IF( n.GT.1 )
982 $ CALL claset( 'L', n-1, n-1, czero, czero,
983 $ vt( 2, 1 ), ldvt )
984*
985* Generate Q in A
986* (CWorkspace: need 2*N, prefer N+N*NB)
987* (RWorkspace: 0)
988*
989 CALL cungqr( m, n, n, a, lda, work( itau ),
990 $ work( iwork ), lwork-iwork+1, ierr )
991 ie = 1
992 itauq = itau
993 itaup = itauq + n
994 iwork = itaup + n
995*
996* Bidiagonalize R in VT
997* (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
998* (RWorkspace: N)
999*
1000 CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),
1001 $ work( itauq ), work( itaup ),
1002 $ work( iwork ), lwork-iwork+1, ierr )
1003*
1004* Multiply Q in A by left vectors bidiagonalizing R
1005* (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1006* (RWorkspace: 0)
1007*
1008 CALL cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1009 $ work( itauq ), a, lda, work( iwork ),
1010 $ lwork-iwork+1, ierr )
1011*
1012* Generate right vectors bidiagonalizing R in VT
1013* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1014* (RWorkspace: 0)
1015*
1016 CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1017 $ work( iwork ), lwork-iwork+1, ierr )
1018 irwork = ie + n
1019*
1020* Perform bidiagonal QR iteration, computing left
1021* singular vectors of A in A and computing right
1022* singular vectors of A in VT
1023* (CWorkspace: 0)
1024* (RWorkspace: need BDSPAC)
1025*
1026 CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
1027 $ ldvt, a, lda, cdum, 1, rwork( irwork ),
1028 $ info )
1029*
1030 END IF
1031*
1032 ELSE IF( wntus ) THEN
1033*
1034 IF( wntvn ) THEN
1035*
1036* Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1037* N left singular vectors to be computed in U and
1038* no right singular vectors to be computed
1039*
1040 IF( lwork.GE.n*n+3*n ) THEN
1041*
1042* Sufficient workspace for a fast algorithm
1043*
1044 ir = 1
1045 IF( lwork.GE.wrkbl+lda*n ) THEN
1046*
1047* WORK(IR) is LDA by N
1048*
1049 ldwrkr = lda
1050 ELSE
1051*
1052* WORK(IR) is N by N
1053*
1054 ldwrkr = n
1055 END IF
1056 itau = ir + ldwrkr*n
1057 iwork = itau + n
1058*
1059* Compute A=Q*R
1060* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1061* (RWorkspace: 0)
1062*
1063 CALL cgeqrf( m, n, a, lda, work( itau ),
1064 $ work( iwork ), lwork-iwork+1, ierr )
1065*
1066* Copy R to WORK(IR), zeroing out below it
1067*
1068 CALL clacpy( 'U', n, n, a, lda, work( ir ),
1069 $ ldwrkr )
1070 CALL claset( 'L', n-1, n-1, czero, czero,
1071 $ work( ir+1 ), ldwrkr )
1072*
1073* Generate Q in A
1074* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1075* (RWorkspace: 0)
1076*
1077 CALL cungqr( m, n, n, a, lda, work( itau ),
1078 $ work( iwork ), lwork-iwork+1, ierr )
1079 ie = 1
1080 itauq = itau
1081 itaup = itauq + n
1082 iwork = itaup + n
1083*
1084* Bidiagonalize R in WORK(IR)
1085* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1086* (RWorkspace: need N)
1087*
1088 CALL cgebrd( n, n, work( ir ), ldwrkr, s,
1089 $ rwork( ie ), work( itauq ),
1090 $ work( itaup ), work( iwork ),
1091 $ lwork-iwork+1, ierr )
1092*
1093* Generate left vectors bidiagonalizing R in WORK(IR)
1094* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1095* (RWorkspace: 0)
1096*
1097 CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,
1098 $ work( itauq ), work( iwork ),
1099 $ lwork-iwork+1, ierr )
1100 irwork = ie + n
1101*
1102* Perform bidiagonal QR iteration, computing left
1103* singular vectors of R in WORK(IR)
1104* (CWorkspace: need N*N)
1105* (RWorkspace: need BDSPAC)
1106*
1107 CALL cbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,
1108 $ 1, work( ir ), ldwrkr, cdum, 1,
1109 $ rwork( irwork ), info )
1110*
1111* Multiply Q in A by left singular vectors of R in
1112* WORK(IR), storing result in U
1113* (CWorkspace: need N*N)
1114* (RWorkspace: 0)
1115*
1116 CALL cgemm( 'N', 'N', m, n, n, cone, a, lda,
1117 $ work( ir ), ldwrkr, czero, u, ldu )
1118*
1119 ELSE
1120*
1121* Insufficient workspace for a fast algorithm
1122*
1123 itau = 1
1124 iwork = itau + n
1125*
1126* Compute A=Q*R, copying result to U
1127* (CWorkspace: need 2*N, prefer N+N*NB)
1128* (RWorkspace: 0)
1129*
1130 CALL cgeqrf( m, n, a, lda, work( itau ),
1131 $ work( iwork ), lwork-iwork+1, ierr )
1132 CALL clacpy( 'L', m, n, a, lda, u, ldu )
1133*
1134* Generate Q in U
1135* (CWorkspace: need 2*N, prefer N+N*NB)
1136* (RWorkspace: 0)
1137*
1138 CALL cungqr( m, n, n, u, ldu, work( itau ),
1139 $ work( iwork ), lwork-iwork+1, ierr )
1140 ie = 1
1141 itauq = itau
1142 itaup = itauq + n
1143 iwork = itaup + n
1144*
1145* Zero out below R in A
1146*
1147 IF( n .GT. 1 ) THEN
1148 CALL claset( 'L', n-1, n-1, czero, czero,
1149 $ a( 2, 1 ), lda )
1150 END IF
1151*
1152* Bidiagonalize R in A
1153* (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1154* (RWorkspace: need N)
1155*
1156 CALL cgebrd( n, n, a, lda, s, rwork( ie ),
1157 $ work( itauq ), work( itaup ),
1158 $ work( iwork ), lwork-iwork+1, ierr )
1159*
1160* Multiply Q in U by left vectors bidiagonalizing R
1161* (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1162* (RWorkspace: 0)
1163*
1164 CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1165 $ work( itauq ), u, ldu, work( iwork ),
1166 $ lwork-iwork+1, ierr )
1167 irwork = ie + n
1168*
1169* Perform bidiagonal QR iteration, computing left
1170* singular vectors of A in U
1171* (CWorkspace: 0)
1172* (RWorkspace: need BDSPAC)
1173*
1174 CALL cbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,
1175 $ 1, u, ldu, cdum, 1, rwork( irwork ),
1176 $ info )
1177*
1178 END IF
1179*
1180 ELSE IF( wntvo ) THEN
1181*
1182* Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1183* N left singular vectors to be computed in U and
1184* N right singular vectors to be overwritten on A
1185*
1186 IF( lwork.GE.2*n*n+3*n ) THEN
1187*
1188* Sufficient workspace for a fast algorithm
1189*
1190 iu = 1
1191 IF( lwork.GE.wrkbl+2*lda*n ) THEN
1192*
1193* WORK(IU) is LDA by N and WORK(IR) is LDA by N
1194*
1195 ldwrku = lda
1196 ir = iu + ldwrku*n
1197 ldwrkr = lda
1198 ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1199*
1200* WORK(IU) is LDA by N and WORK(IR) is N by N
1201*
1202 ldwrku = lda
1203 ir = iu + ldwrku*n
1204 ldwrkr = n
1205 ELSE
1206*
1207* WORK(IU) is N by N and WORK(IR) is N by N
1208*
1209 ldwrku = n
1210 ir = iu + ldwrku*n
1211 ldwrkr = n
1212 END IF
1213 itau = ir + ldwrkr*n
1214 iwork = itau + n
1215*
1216* Compute A=Q*R
1217* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1218* (RWorkspace: 0)
1219*
1220 CALL cgeqrf( m, n, a, lda, work( itau ),
1221 $ work( iwork ), lwork-iwork+1, ierr )
1222*
1223* Copy R to WORK(IU), zeroing out below it
1224*
1225 CALL clacpy( 'U', n, n, a, lda, work( iu ),
1226 $ ldwrku )
1227 CALL claset( 'L', n-1, n-1, czero, czero,
1228 $ work( iu+1 ), ldwrku )
1229*
1230* Generate Q in A
1231* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1232* (RWorkspace: 0)
1233*
1234 CALL cungqr( m, n, n, a, lda, work( itau ),
1235 $ work( iwork ), lwork-iwork+1, ierr )
1236 ie = 1
1237 itauq = itau
1238 itaup = itauq + n
1239 iwork = itaup + n
1240*
1241* Bidiagonalize R in WORK(IU), copying result to
1242* WORK(IR)
1243* (CWorkspace: need 2*N*N+3*N,
1244* prefer 2*N*N+2*N+2*N*NB)
1245* (RWorkspace: need N)
1246*
1247 CALL cgebrd( n, n, work( iu ), ldwrku, s,
1248 $ rwork( ie ), work( itauq ),
1249 $ work( itaup ), work( iwork ),
1250 $ lwork-iwork+1, ierr )
1251 CALL clacpy( 'U', n, n, work( iu ), ldwrku,
1252 $ work( ir ), ldwrkr )
1253*
1254* Generate left bidiagonalizing vectors in WORK(IU)
1255* (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1256* (RWorkspace: 0)
1257*
1258 CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,
1259 $ work( itauq ), work( iwork ),
1260 $ lwork-iwork+1, ierr )
1261*
1262* Generate right bidiagonalizing vectors in WORK(IR)
1263* (CWorkspace: need 2*N*N+3*N-1,
1264* prefer 2*N*N+2*N+(N-1)*NB)
1265* (RWorkspace: 0)
1266*
1267 CALL cungbr( 'P', n, n, n, work( ir ), ldwrkr,
1268 $ work( itaup ), work( iwork ),
1269 $ lwork-iwork+1, ierr )
1270 irwork = ie + n
1271*
1272* Perform bidiagonal QR iteration, computing left
1273* singular vectors of R in WORK(IU) and computing
1274* right singular vectors of R in WORK(IR)
1275* (CWorkspace: need 2*N*N)
1276* (RWorkspace: need BDSPAC)
1277*
1278 CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
1279 $ work( ir ), ldwrkr, work( iu ),
1280 $ ldwrku, cdum, 1, rwork( irwork ),
1281 $ info )
1282*
1283* Multiply Q in A by left singular vectors of R in
1284* WORK(IU), storing result in U
1285* (CWorkspace: need N*N)
1286* (RWorkspace: 0)
1287*
1288 CALL cgemm( 'N', 'N', m, n, n, cone, a, lda,
1289 $ work( iu ), ldwrku, czero, u, ldu )
1290*
1291* Copy right singular vectors of R to A
1292* (CWorkspace: need N*N)
1293* (RWorkspace: 0)
1294*
1295 CALL clacpy( 'F', n, n, work( ir ), ldwrkr, a,
1296 $ lda )
1297*
1298 ELSE
1299*
1300* Insufficient workspace for a fast algorithm
1301*
1302 itau = 1
1303 iwork = itau + n
1304*
1305* Compute A=Q*R, copying result to U
1306* (CWorkspace: need 2*N, prefer N+N*NB)
1307* (RWorkspace: 0)
1308*
1309 CALL cgeqrf( m, n, a, lda, work( itau ),
1310 $ work( iwork ), lwork-iwork+1, ierr )
1311 CALL clacpy( 'L', m, n, a, lda, u, ldu )
1312*
1313* Generate Q in U
1314* (CWorkspace: need 2*N, prefer N+N*NB)
1315* (RWorkspace: 0)
1316*
1317 CALL cungqr( m, n, n, u, ldu, work( itau ),
1318 $ work( iwork ), lwork-iwork+1, ierr )
1319 ie = 1
1320 itauq = itau
1321 itaup = itauq + n
1322 iwork = itaup + n
1323*
1324* Zero out below R in A
1325*
1326 IF( n .GT. 1 ) THEN
1327 CALL claset( 'L', n-1, n-1, czero, czero,
1328 $ a( 2, 1 ), lda )
1329 END IF
1330*
1331* Bidiagonalize R in A
1332* (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1333* (RWorkspace: need N)
1334*
1335 CALL cgebrd( n, n, a, lda, s, rwork( ie ),
1336 $ work( itauq ), work( itaup ),
1337 $ work( iwork ), lwork-iwork+1, ierr )
1338*
1339* Multiply Q in U by left vectors bidiagonalizing R
1340* (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1341* (RWorkspace: 0)
1342*
1343 CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1344 $ work( itauq ), u, ldu, work( iwork ),
1345 $ lwork-iwork+1, ierr )
1346*
1347* Generate right vectors bidiagonalizing R in A
1348* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1349* (RWorkspace: 0)
1350*
1351 CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),
1352 $ work( iwork ), lwork-iwork+1, ierr )
1353 irwork = ie + n
1354*
1355* Perform bidiagonal QR iteration, computing left
1356* singular vectors of A in U and computing right
1357* singular vectors of A in A
1358* (CWorkspace: 0)
1359* (RWorkspace: need BDSPAC)
1360*
1361 CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,
1362 $ lda, u, ldu, cdum, 1, rwork( irwork ),
1363 $ info )
1364*
1365 END IF
1366*
1367 ELSE IF( wntvas ) THEN
1368*
1369* Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1370* or 'A')
1371* N left singular vectors to be computed in U and
1372* N right singular vectors to be computed in VT
1373*
1374 IF( lwork.GE.n*n+3*n ) THEN
1375*
1376* Sufficient workspace for a fast algorithm
1377*
1378 iu = 1
1379 IF( lwork.GE.wrkbl+lda*n ) THEN
1380*
1381* WORK(IU) is LDA by N
1382*
1383 ldwrku = lda
1384 ELSE
1385*
1386* WORK(IU) is N by N
1387*
1388 ldwrku = n
1389 END IF
1390 itau = iu + ldwrku*n
1391 iwork = itau + n
1392*
1393* Compute A=Q*R
1394* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1395* (RWorkspace: 0)
1396*
1397 CALL cgeqrf( m, n, a, lda, work( itau ),
1398 $ work( iwork ), lwork-iwork+1, ierr )
1399*
1400* Copy R to WORK(IU), zeroing out below it
1401*
1402 CALL clacpy( 'U', n, n, a, lda, work( iu ),
1403 $ ldwrku )
1404 CALL claset( 'L', n-1, n-1, czero, czero,
1405 $ work( iu+1 ), ldwrku )
1406*
1407* Generate Q in A
1408* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1409* (RWorkspace: 0)
1410*
1411 CALL cungqr( m, n, n, a, lda, work( itau ),
1412 $ work( iwork ), lwork-iwork+1, ierr )
1413 ie = 1
1414 itauq = itau
1415 itaup = itauq + n
1416 iwork = itaup + n
1417*
1418* Bidiagonalize R in WORK(IU), copying result to VT
1419* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1420* (RWorkspace: need N)
1421*
1422 CALL cgebrd( n, n, work( iu ), ldwrku, s,
1423 $ rwork( ie ), work( itauq ),
1424 $ work( itaup ), work( iwork ),
1425 $ lwork-iwork+1, ierr )
1426 CALL clacpy( 'U', n, n, work( iu ), ldwrku, vt,
1427 $ ldvt )
1428*
1429* Generate left bidiagonalizing vectors in WORK(IU)
1430* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1431* (RWorkspace: 0)
1432*
1433 CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,
1434 $ work( itauq ), work( iwork ),
1435 $ lwork-iwork+1, ierr )
1436*
1437* Generate right bidiagonalizing vectors in VT
1438* (CWorkspace: need N*N+3*N-1,
1439* prefer N*N+2*N+(N-1)*NB)
1440* (RWorkspace: 0)
1441*
1442 CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1443 $ work( iwork ), lwork-iwork+1, ierr )
1444 irwork = ie + n
1445*
1446* Perform bidiagonal QR iteration, computing left
1447* singular vectors of R in WORK(IU) and computing
1448* right singular vectors of R in VT
1449* (CWorkspace: need N*N)
1450* (RWorkspace: need BDSPAC)
1451*
1452 CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
1453 $ ldvt, work( iu ), ldwrku, cdum, 1,
1454 $ rwork( irwork ), info )
1455*
1456* Multiply Q in A by left singular vectors of R in
1457* WORK(IU), storing result in U
1458* (CWorkspace: need N*N)
1459* (RWorkspace: 0)
1460*
1461 CALL cgemm( 'N', 'N', m, n, n, cone, a, lda,
1462 $ work( iu ), ldwrku, czero, u, ldu )
1463*
1464 ELSE
1465*
1466* Insufficient workspace for a fast algorithm
1467*
1468 itau = 1
1469 iwork = itau + n
1470*
1471* Compute A=Q*R, copying result to U
1472* (CWorkspace: need 2*N, prefer N+N*NB)
1473* (RWorkspace: 0)
1474*
1475 CALL cgeqrf( m, n, a, lda, work( itau ),
1476 $ work( iwork ), lwork-iwork+1, ierr )
1477 CALL clacpy( 'L', m, n, a, lda, u, ldu )
1478*
1479* Generate Q in U
1480* (CWorkspace: need 2*N, prefer N+N*NB)
1481* (RWorkspace: 0)
1482*
1483 CALL cungqr( m, n, n, u, ldu, work( itau ),
1484 $ work( iwork ), lwork-iwork+1, ierr )
1485*
1486* Copy R to VT, zeroing out below it
1487*
1488 CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
1489 IF( n.GT.1 )
1490 $ CALL claset( 'L', n-1, n-1, czero, czero,
1491 $ vt( 2, 1 ), ldvt )
1492 ie = 1
1493 itauq = itau
1494 itaup = itauq + n
1495 iwork = itaup + n
1496*
1497* Bidiagonalize R in VT
1498* (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1499* (RWorkspace: need N)
1500*
1501 CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),
1502 $ work( itauq ), work( itaup ),
1503 $ work( iwork ), lwork-iwork+1, ierr )
1504*
1505* Multiply Q in U by left bidiagonalizing vectors
1506* in VT
1507* (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1508* (RWorkspace: 0)
1509*
1510 CALL cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1511 $ work( itauq ), u, ldu, work( iwork ),
1512 $ lwork-iwork+1, ierr )
1513*
1514* Generate right bidiagonalizing vectors in VT
1515* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1516* (RWorkspace: 0)
1517*
1518 CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1519 $ work( iwork ), lwork-iwork+1, ierr )
1520 irwork = ie + n
1521*
1522* Perform bidiagonal QR iteration, computing left
1523* singular vectors of A in U and computing right
1524* singular vectors of A in VT
1525* (CWorkspace: 0)
1526* (RWorkspace: need BDSPAC)
1527*
1528 CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
1529 $ ldvt, u, ldu, cdum, 1,
1530 $ rwork( irwork ), info )
1531*
1532 END IF
1533*
1534 END IF
1535*
1536 ELSE IF( wntua ) THEN
1537*
1538 IF( wntvn ) THEN
1539*
1540* Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1541* M left singular vectors to be computed in U and
1542* no right singular vectors to be computed
1543*
1544 IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN
1545*
1546* Sufficient workspace for a fast algorithm
1547*
1548 ir = 1
1549 IF( lwork.GE.wrkbl+lda*n ) THEN
1550*
1551* WORK(IR) is LDA by N
1552*
1553 ldwrkr = lda
1554 ELSE
1555*
1556* WORK(IR) is N by N
1557*
1558 ldwrkr = n
1559 END IF
1560 itau = ir + ldwrkr*n
1561 iwork = itau + n
1562*
1563* Compute A=Q*R, copying result to U
1564* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1565* (RWorkspace: 0)
1566*
1567 CALL cgeqrf( m, n, a, lda, work( itau ),
1568 $ work( iwork ), lwork-iwork+1, ierr )
1569 CALL clacpy( 'L', m, n, a, lda, u, ldu )
1570*
1571* Copy R to WORK(IR), zeroing out below it
1572*
1573 CALL clacpy( 'U', n, n, a, lda, work( ir ),
1574 $ ldwrkr )
1575 CALL claset( 'L', n-1, n-1, czero, czero,
1576 $ work( ir+1 ), ldwrkr )
1577*
1578* Generate Q in U
1579* (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1580* (RWorkspace: 0)
1581*
1582 CALL cungqr( m, m, n, u, ldu, work( itau ),
1583 $ work( iwork ), lwork-iwork+1, ierr )
1584 ie = 1
1585 itauq = itau
1586 itaup = itauq + n
1587 iwork = itaup + n
1588*
1589* Bidiagonalize R in WORK(IR)
1590* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1591* (RWorkspace: need N)
1592*
1593 CALL cgebrd( n, n, work( ir ), ldwrkr, s,
1594 $ rwork( ie ), work( itauq ),
1595 $ work( itaup ), work( iwork ),
1596 $ lwork-iwork+1, ierr )
1597*
1598* Generate left bidiagonalizing vectors in WORK(IR)
1599* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1600* (RWorkspace: 0)
1601*
1602 CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,
1603 $ work( itauq ), work( iwork ),
1604 $ lwork-iwork+1, ierr )
1605 irwork = ie + n
1606*
1607* Perform bidiagonal QR iteration, computing left
1608* singular vectors of R in WORK(IR)
1609* (CWorkspace: need N*N)
1610* (RWorkspace: need BDSPAC)
1611*
1612 CALL cbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,
1613 $ 1, work( ir ), ldwrkr, cdum, 1,
1614 $ rwork( irwork ), info )
1615*
1616* Multiply Q in U by left singular vectors of R in
1617* WORK(IR), storing result in A
1618* (CWorkspace: need N*N)
1619* (RWorkspace: 0)
1620*
1621 CALL cgemm( 'N', 'N', m, n, n, cone, u, ldu,
1622 $ work( ir ), ldwrkr, czero, a, lda )
1623*
1624* Copy left singular vectors of A from A to U
1625*
1626 CALL clacpy( 'F', m, n, a, lda, u, ldu )
1627*
1628 ELSE
1629*
1630* Insufficient workspace for a fast algorithm
1631*
1632 itau = 1
1633 iwork = itau + n
1634*
1635* Compute A=Q*R, copying result to U
1636* (CWorkspace: need 2*N, prefer N+N*NB)
1637* (RWorkspace: 0)
1638*
1639 CALL cgeqrf( m, n, a, lda, work( itau ),
1640 $ work( iwork ), lwork-iwork+1, ierr )
1641 CALL clacpy( 'L', m, n, a, lda, u, ldu )
1642*
1643* Generate Q in U
1644* (CWorkspace: need N+M, prefer N+M*NB)
1645* (RWorkspace: 0)
1646*
1647 CALL cungqr( m, m, n, u, ldu, work( itau ),
1648 $ work( iwork ), lwork-iwork+1, ierr )
1649 ie = 1
1650 itauq = itau
1651 itaup = itauq + n
1652 iwork = itaup + n
1653*
1654* Zero out below R in A
1655*
1656 IF( n .GT. 1 ) THEN
1657 CALL claset( 'L', n-1, n-1, czero, czero,
1658 $ a( 2, 1 ), lda )
1659 END IF
1660*
1661* Bidiagonalize R in A
1662* (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1663* (RWorkspace: need N)
1664*
1665 CALL cgebrd( n, n, a, lda, s, rwork( ie ),
1666 $ work( itauq ), work( itaup ),
1667 $ work( iwork ), lwork-iwork+1, ierr )
1668*
1669* Multiply Q in U by left bidiagonalizing vectors
1670* in A
1671* (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1672* (RWorkspace: 0)
1673*
1674 CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1675 $ work( itauq ), u, ldu, work( iwork ),
1676 $ lwork-iwork+1, ierr )
1677 irwork = ie + n
1678*
1679* Perform bidiagonal QR iteration, computing left
1680* singular vectors of A in U
1681* (CWorkspace: 0)
1682* (RWorkspace: need BDSPAC)
1683*
1684 CALL cbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,
1685 $ 1, u, ldu, cdum, 1, rwork( irwork ),
1686 $ info )
1687*
1688 END IF
1689*
1690 ELSE IF( wntvo ) THEN
1691*
1692* Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1693* M left singular vectors to be computed in U and
1694* N right singular vectors to be overwritten on A
1695*
1696 IF( lwork.GE.2*n*n+max( n+m, 3*n ) ) THEN
1697*
1698* Sufficient workspace for a fast algorithm
1699*
1700 iu = 1
1701 IF( lwork.GE.wrkbl+2*lda*n ) THEN
1702*
1703* WORK(IU) is LDA by N and WORK(IR) is LDA by N
1704*
1705 ldwrku = lda
1706 ir = iu + ldwrku*n
1707 ldwrkr = lda
1708 ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1709*
1710* WORK(IU) is LDA by N and WORK(IR) is N by N
1711*
1712 ldwrku = lda
1713 ir = iu + ldwrku*n
1714 ldwrkr = n
1715 ELSE
1716*
1717* WORK(IU) is N by N and WORK(IR) is N by N
1718*
1719 ldwrku = n
1720 ir = iu + ldwrku*n
1721 ldwrkr = n
1722 END IF
1723 itau = ir + ldwrkr*n
1724 iwork = itau + n
1725*
1726* Compute A=Q*R, copying result to U
1727* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1728* (RWorkspace: 0)
1729*
1730 CALL cgeqrf( m, n, a, lda, work( itau ),
1731 $ work( iwork ), lwork-iwork+1, ierr )
1732 CALL clacpy( 'L', m, n, a, lda, u, ldu )
1733*
1734* Generate Q in U
1735* (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1736* (RWorkspace: 0)
1737*
1738 CALL cungqr( m, m, n, u, ldu, work( itau ),
1739 $ work( iwork ), lwork-iwork+1, ierr )
1740*
1741* Copy R to WORK(IU), zeroing out below it
1742*
1743 CALL clacpy( 'U', n, n, a, lda, work( iu ),
1744 $ ldwrku )
1745 CALL claset( 'L', n-1, n-1, czero, czero,
1746 $ work( iu+1 ), ldwrku )
1747 ie = 1
1748 itauq = itau
1749 itaup = itauq + n
1750 iwork = itaup + n
1751*
1752* Bidiagonalize R in WORK(IU), copying result to
1753* WORK(IR)
1754* (CWorkspace: need 2*N*N+3*N,
1755* prefer 2*N*N+2*N+2*N*NB)
1756* (RWorkspace: need N)
1757*
1758 CALL cgebrd( n, n, work( iu ), ldwrku, s,
1759 $ rwork( ie ), work( itauq ),
1760 $ work( itaup ), work( iwork ),
1761 $ lwork-iwork+1, ierr )
1762 CALL clacpy( 'U', n, n, work( iu ), ldwrku,
1763 $ work( ir ), ldwrkr )
1764*
1765* Generate left bidiagonalizing vectors in WORK(IU)
1766* (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1767* (RWorkspace: 0)
1768*
1769 CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,
1770 $ work( itauq ), work( iwork ),
1771 $ lwork-iwork+1, ierr )
1772*
1773* Generate right bidiagonalizing vectors in WORK(IR)
1774* (CWorkspace: need 2*N*N+3*N-1,
1775* prefer 2*N*N+2*N+(N-1)*NB)
1776* (RWorkspace: 0)
1777*
1778 CALL cungbr( 'P', n, n, n, work( ir ), ldwrkr,
1779 $ work( itaup ), work( iwork ),
1780 $ lwork-iwork+1, ierr )
1781 irwork = ie + n
1782*
1783* Perform bidiagonal QR iteration, computing left
1784* singular vectors of R in WORK(IU) and computing
1785* right singular vectors of R in WORK(IR)
1786* (CWorkspace: need 2*N*N)
1787* (RWorkspace: need BDSPAC)
1788*
1789 CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
1790 $ work( ir ), ldwrkr, work( iu ),
1791 $ ldwrku, cdum, 1, rwork( irwork ),
1792 $ info )
1793*
1794* Multiply Q in U by left singular vectors of R in
1795* WORK(IU), storing result in A
1796* (CWorkspace: need N*N)
1797* (RWorkspace: 0)
1798*
1799 CALL cgemm( 'N', 'N', m, n, n, cone, u, ldu,
1800 $ work( iu ), ldwrku, czero, a, lda )
1801*
1802* Copy left singular vectors of A from A to U
1803*
1804 CALL clacpy( 'F', m, n, a, lda, u, ldu )
1805*
1806* Copy right singular vectors of R from WORK(IR) to A
1807*
1808 CALL clacpy( 'F', n, n, work( ir ), ldwrkr, a,
1809 $ lda )
1810*
1811 ELSE
1812*
1813* Insufficient workspace for a fast algorithm
1814*
1815 itau = 1
1816 iwork = itau + n
1817*
1818* Compute A=Q*R, copying result to U
1819* (CWorkspace: need 2*N, prefer N+N*NB)
1820* (RWorkspace: 0)
1821*
1822 CALL cgeqrf( m, n, a, lda, work( itau ),
1823 $ work( iwork ), lwork-iwork+1, ierr )
1824 CALL clacpy( 'L', m, n, a, lda, u, ldu )
1825*
1826* Generate Q in U
1827* (CWorkspace: need N+M, prefer N+M*NB)
1828* (RWorkspace: 0)
1829*
1830 CALL cungqr( m, m, n, u, ldu, work( itau ),
1831 $ work( iwork ), lwork-iwork+1, ierr )
1832 ie = 1
1833 itauq = itau
1834 itaup = itauq + n
1835 iwork = itaup + n
1836*
1837* Zero out below R in A
1838*
1839 IF( n .GT. 1 ) THEN
1840 CALL claset( 'L', n-1, n-1, czero, czero,
1841 $ a( 2, 1 ), lda )
1842 END IF
1843*
1844* Bidiagonalize R in A
1845* (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1846* (RWorkspace: need N)
1847*
1848 CALL cgebrd( n, n, a, lda, s, rwork( ie ),
1849 $ work( itauq ), work( itaup ),
1850 $ work( iwork ), lwork-iwork+1, ierr )
1851*
1852* Multiply Q in U by left bidiagonalizing vectors
1853* in A
1854* (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1855* (RWorkspace: 0)
1856*
1857 CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1858 $ work( itauq ), u, ldu, work( iwork ),
1859 $ lwork-iwork+1, ierr )
1860*
1861* Generate right bidiagonalizing vectors in A
1862* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1863* (RWorkspace: 0)
1864*
1865 CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),
1866 $ work( iwork ), lwork-iwork+1, ierr )
1867 irwork = ie + n
1868*
1869* Perform bidiagonal QR iteration, computing left
1870* singular vectors of A in U and computing right
1871* singular vectors of A in A
1872* (CWorkspace: 0)
1873* (RWorkspace: need BDSPAC)
1874*
1875 CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,
1876 $ lda, u, ldu, cdum, 1, rwork( irwork ),
1877 $ info )
1878*
1879 END IF
1880*
1881 ELSE IF( wntvas ) THEN
1882*
1883* Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1884* or 'A')
1885* M left singular vectors to be computed in U and
1886* N right singular vectors to be computed in VT
1887*
1888 IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN
1889*
1890* Sufficient workspace for a fast algorithm
1891*
1892 iu = 1
1893 IF( lwork.GE.wrkbl+lda*n ) THEN
1894*
1895* WORK(IU) is LDA by N
1896*
1897 ldwrku = lda
1898 ELSE
1899*
1900* WORK(IU) is N by N
1901*
1902 ldwrku = n
1903 END IF
1904 itau = iu + ldwrku*n
1905 iwork = itau + n
1906*
1907* Compute A=Q*R, copying result to U
1908* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1909* (RWorkspace: 0)
1910*
1911 CALL cgeqrf( m, n, a, lda, work( itau ),
1912 $ work( iwork ), lwork-iwork+1, ierr )
1913 CALL clacpy( 'L', m, n, a, lda, u, ldu )
1914*
1915* Generate Q in U
1916* (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1917* (RWorkspace: 0)
1918*
1919 CALL cungqr( m, m, n, u, ldu, work( itau ),
1920 $ work( iwork ), lwork-iwork+1, ierr )
1921*
1922* Copy R to WORK(IU), zeroing out below it
1923*
1924 CALL clacpy( 'U', n, n, a, lda, work( iu ),
1925 $ ldwrku )
1926 CALL claset( 'L', n-1, n-1, czero, czero,
1927 $ work( iu+1 ), ldwrku )
1928 ie = 1
1929 itauq = itau
1930 itaup = itauq + n
1931 iwork = itaup + n
1932*
1933* Bidiagonalize R in WORK(IU), copying result to VT
1934* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1935* (RWorkspace: need N)
1936*
1937 CALL cgebrd( n, n, work( iu ), ldwrku, s,
1938 $ rwork( ie ), work( itauq ),
1939 $ work( itaup ), work( iwork ),
1940 $ lwork-iwork+1, ierr )
1941 CALL clacpy( 'U', n, n, work( iu ), ldwrku, vt,
1942 $ ldvt )
1943*
1944* Generate left bidiagonalizing vectors in WORK(IU)
1945* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1946* (RWorkspace: 0)
1947*
1948 CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,
1949 $ work( itauq ), work( iwork ),
1950 $ lwork-iwork+1, ierr )
1951*
1952* Generate right bidiagonalizing vectors in VT
1953* (CWorkspace: need N*N+3*N-1,
1954* prefer N*N+2*N+(N-1)*NB)
1955* (RWorkspace: need 0)
1956*
1957 CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1958 $ work( iwork ), lwork-iwork+1, ierr )
1959 irwork = ie + n
1960*
1961* Perform bidiagonal QR iteration, computing left
1962* singular vectors of R in WORK(IU) and computing
1963* right singular vectors of R in VT
1964* (CWorkspace: need N*N)
1965* (RWorkspace: need BDSPAC)
1966*
1967 CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
1968 $ ldvt, work( iu ), ldwrku, cdum, 1,
1969 $ rwork( irwork ), info )
1970*
1971* Multiply Q in U by left singular vectors of R in
1972* WORK(IU), storing result in A
1973* (CWorkspace: need N*N)
1974* (RWorkspace: 0)
1975*
1976 CALL cgemm( 'N', 'N', m, n, n, cone, u, ldu,
1977 $ work( iu ), ldwrku, czero, a, lda )
1978*
1979* Copy left singular vectors of A from A to U
1980*
1981 CALL clacpy( 'F', m, n, a, lda, u, ldu )
1982*
1983 ELSE
1984*
1985* Insufficient workspace for a fast algorithm
1986*
1987 itau = 1
1988 iwork = itau + n
1989*
1990* Compute A=Q*R, copying result to U
1991* (CWorkspace: need 2*N, prefer N+N*NB)
1992* (RWorkspace: 0)
1993*
1994 CALL cgeqrf( m, n, a, lda, work( itau ),
1995 $ work( iwork ), lwork-iwork+1, ierr )
1996 CALL clacpy( 'L', m, n, a, lda, u, ldu )
1997*
1998* Generate Q in U
1999* (CWorkspace: need N+M, prefer N+M*NB)
2000* (RWorkspace: 0)
2001*
2002 CALL cungqr( m, m, n, u, ldu, work( itau ),
2003 $ work( iwork ), lwork-iwork+1, ierr )
2004*
2005* Copy R from A to VT, zeroing out below it
2006*
2007 CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
2008 IF( n.GT.1 )
2009 $ CALL claset( 'L', n-1, n-1, czero, czero,
2010 $ vt( 2, 1 ), ldvt )
2011 ie = 1
2012 itauq = itau
2013 itaup = itauq + n
2014 iwork = itaup + n
2015*
2016* Bidiagonalize R in VT
2017* (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
2018* (RWorkspace: need N)
2019*
2020 CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),
2021 $ work( itauq ), work( itaup ),
2022 $ work( iwork ), lwork-iwork+1, ierr )
2023*
2024* Multiply Q in U by left bidiagonalizing vectors
2025* in VT
2026* (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
2027* (RWorkspace: 0)
2028*
2029 CALL cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
2030 $ work( itauq ), u, ldu, work( iwork ),
2031 $ lwork-iwork+1, ierr )
2032*
2033* Generate right bidiagonalizing vectors in VT
2034* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2035* (RWorkspace: 0)
2036*
2037 CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2038 $ work( iwork ), lwork-iwork+1, ierr )
2039 irwork = ie + n
2040*
2041* Perform bidiagonal QR iteration, computing left
2042* singular vectors of A in U and computing right
2043* singular vectors of A in VT
2044* (CWorkspace: 0)
2045* (RWorkspace: need BDSPAC)
2046*
2047 CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
2048 $ ldvt, u, ldu, cdum, 1,
2049 $ rwork( irwork ), info )
2050*
2051 END IF
2052*
2053 END IF
2054*
2055 END IF
2056*
2057 ELSE
2058*
2059* M .LT. MNTHR
2060*
2061* Path 10 (M at least N, but not much larger)
2062* Reduce to bidiagonal form without QR decomposition
2063*
2064 ie = 1
2065 itauq = 1
2066 itaup = itauq + n
2067 iwork = itaup + n
2068*
2069* Bidiagonalize A
2070* (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
2071* (RWorkspace: need N)
2072*
2073 CALL cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
2074 $ work( itaup ), work( iwork ), lwork-iwork+1,
2075 $ ierr )
2076 IF( wntuas ) THEN
2077*
2078* If left singular vectors desired in U, copy result to U
2079* and generate left bidiagonalizing vectors in U
2080* (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB)
2081* (RWorkspace: 0)
2082*
2083 CALL clacpy( 'L', m, n, a, lda, u, ldu )
2084 IF( wntus )
2085 $ ncu = n
2086 IF( wntua )
2087 $ ncu = m
2088 CALL cungbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
2089 $ work( iwork ), lwork-iwork+1, ierr )
2090 END IF
2091 IF( wntvas ) THEN
2092*
2093* If right singular vectors desired in VT, copy result to
2094* VT and generate right bidiagonalizing vectors in VT
2095* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2096* (RWorkspace: 0)
2097*
2098 CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
2099 CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2100 $ work( iwork ), lwork-iwork+1, ierr )
2101 END IF
2102 IF( wntuo ) THEN
2103*
2104* If left singular vectors desired in A, generate left
2105* bidiagonalizing vectors in A
2106* (CWorkspace: need 3*N, prefer 2*N+N*NB)
2107* (RWorkspace: 0)
2108*
2109 CALL cungbr( 'Q', m, n, n, a, lda, work( itauq ),
2110 $ work( iwork ), lwork-iwork+1, ierr )
2111 END IF
2112 IF( wntvo ) THEN
2113*
2114* If right singular vectors desired in A, generate right
2115* bidiagonalizing vectors in A
2116* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2117* (RWorkspace: 0)
2118*
2119 CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),
2120 $ work( iwork ), lwork-iwork+1, ierr )
2121 END IF
2122 irwork = ie + n
2123 IF( wntuas .OR. wntuo )
2124 $ nru = m
2125 IF( wntun )
2126 $ nru = 0
2127 IF( wntvas .OR. wntvo )
2128 $ ncvt = n
2129 IF( wntvn )
2130 $ ncvt = 0
2131 IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2132*
2133* Perform bidiagonal QR iteration, if desired, computing
2134* left singular vectors in U and computing right singular
2135* vectors in VT
2136* (CWorkspace: 0)
2137* (RWorkspace: need BDSPAC)
2138*
2139 CALL cbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,
2140 $ ldvt, u, ldu, cdum, 1, rwork( irwork ),
2141 $ info )
2142 ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2143*
2144* Perform bidiagonal QR iteration, if desired, computing
2145* left singular vectors in U and computing right singular
2146* vectors in A
2147* (CWorkspace: 0)
2148* (RWorkspace: need BDSPAC)
2149*
2150 CALL cbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), a,
2151 $ lda, u, ldu, cdum, 1, rwork( irwork ),
2152 $ info )
2153 ELSE
2154*
2155* Perform bidiagonal QR iteration, if desired, computing
2156* left singular vectors in A and computing right singular
2157* vectors in VT
2158* (CWorkspace: 0)
2159* (RWorkspace: need BDSPAC)
2160*
2161 CALL cbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,
2162 $ ldvt, a, lda, cdum, 1, rwork( irwork ),
2163 $ info )
2164 END IF
2165*
2166 END IF
2167*
2168 ELSE
2169*
2170* A has more columns than rows. If A has sufficiently more
2171* columns than rows, first reduce using the LQ decomposition (if
2172* sufficient workspace available)
2173*
2174 IF( n.GE.mnthr ) THEN
2175*
2176 IF( wntvn ) THEN
2177*
2178* Path 1t(N much larger than M, JOBVT='N')
2179* No right singular vectors to be computed
2180*
2181 itau = 1
2182 iwork = itau + m
2183*
2184* Compute A=L*Q
2185* (CWorkspace: need 2*M, prefer M+M*NB)
2186* (RWorkspace: 0)
2187*
2188 CALL cgelqf( m, n, a, lda, work( itau ), work( iwork ),
2189 $ lwork-iwork+1, ierr )
2190*
2191* Zero out above L
2192*
2193 CALL claset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),
2194 $ lda )
2195 ie = 1
2196 itauq = 1
2197 itaup = itauq + m
2198 iwork = itaup + m
2199*
2200* Bidiagonalize L in A
2201* (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2202* (RWorkspace: need M)
2203*
2204 CALL cgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),
2205 $ work( itaup ), work( iwork ), lwork-iwork+1,
2206 $ ierr )
2207 IF( wntuo .OR. wntuas ) THEN
2208*
2209* If left singular vectors desired, generate Q
2210* (CWorkspace: need 3*M, prefer 2*M+M*NB)
2211* (RWorkspace: 0)
2212*
2213 CALL cungbr( 'Q', m, m, m, a, lda, work( itauq ),
2214 $ work( iwork ), lwork-iwork+1, ierr )
2215 END IF
2216 irwork = ie + m
2217 nru = 0
2218 IF( wntuo .OR. wntuas )
2219 $ nru = m
2220*
2221* Perform bidiagonal QR iteration, computing left singular
2222* vectors of A in A if desired
2223* (CWorkspace: 0)
2224* (RWorkspace: need BDSPAC)
2225*
2226 CALL cbdsqr( 'U', m, 0, nru, 0, s, rwork( ie ), cdum, 1,
2227 $ a, lda, cdum, 1, rwork( irwork ), info )
2228*
2229* If left singular vectors desired in U, copy them there
2230*
2231 IF( wntuas )
2232 $ CALL clacpy( 'F', m, m, a, lda, u, ldu )
2233*
2234 ELSE IF( wntvo .AND. wntun ) THEN
2235*
2236* Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2237* M right singular vectors to be overwritten on A and
2238* no left singular vectors to be computed
2239*
2240 IF( lwork.GE.m*m+3*m ) THEN
2241*
2242* Sufficient workspace for a fast algorithm
2243*
2244 ir = 1
2245 IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN
2246*
2247* WORK(IU) is LDA by N and WORK(IR) is LDA by M
2248*
2249 ldwrku = lda
2250 chunk = n
2251 ldwrkr = lda
2252 ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN
2253*
2254* WORK(IU) is LDA by N and WORK(IR) is M by M
2255*
2256 ldwrku = lda
2257 chunk = n
2258 ldwrkr = m
2259 ELSE
2260*
2261* WORK(IU) is M by CHUNK and WORK(IR) is M by M
2262*
2263 ldwrku = m
2264 chunk = ( lwork-m*m ) / m
2265 ldwrkr = m
2266 END IF
2267 itau = ir + ldwrkr*m
2268 iwork = itau + m
2269*
2270* Compute A=L*Q
2271* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2272* (RWorkspace: 0)
2273*
2274 CALL cgelqf( m, n, a, lda, work( itau ),
2275 $ work( iwork ), lwork-iwork+1, ierr )
2276*
2277* Copy L to WORK(IR) and zero out above it
2278*
2279 CALL clacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
2280 CALL claset( 'U', m-1, m-1, czero, czero,
2281 $ work( ir+ldwrkr ), ldwrkr )
2282*
2283* Generate Q in A
2284* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2285* (RWorkspace: 0)
2286*
2287 CALL cunglq( m, n, m, a, lda, work( itau ),
2288 $ work( iwork ), lwork-iwork+1, ierr )
2289 ie = 1
2290 itauq = itau
2291 itaup = itauq + m
2292 iwork = itaup + m
2293*
2294* Bidiagonalize L in WORK(IR)
2295* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2296* (RWorkspace: need M)
2297*
2298 CALL cgebrd( m, m, work( ir ), ldwrkr, s, rwork( ie ),
2299 $ work( itauq ), work( itaup ),
2300 $ work( iwork ), lwork-iwork+1, ierr )
2301*
2302* Generate right vectors bidiagonalizing L
2303* (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2304* (RWorkspace: 0)
2305*
2306 CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,
2307 $ work( itaup ), work( iwork ),
2308 $ lwork-iwork+1, ierr )
2309 irwork = ie + m
2310*
2311* Perform bidiagonal QR iteration, computing right
2312* singular vectors of L in WORK(IR)
2313* (CWorkspace: need M*M)
2314* (RWorkspace: need BDSPAC)
2315*
2316 CALL cbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
2317 $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
2318 $ rwork( irwork ), info )
2319 iu = itauq
2320*
2321* Multiply right singular vectors of L in WORK(IR) by Q
2322* in A, storing result in WORK(IU) and copying to A
2323* (CWorkspace: need M*M+M, prefer M*M+M*N)
2324* (RWorkspace: 0)
2325*
2326 DO 30 i = 1, n, chunk
2327 blk = min( n-i+1, chunk )
2328 CALL cgemm( 'N', 'N', m, blk, m, cone, work( ir ),
2329 $ ldwrkr, a( 1, i ), lda, czero,
2330 $ work( iu ), ldwrku )
2331 CALL clacpy( 'F', m, blk, work( iu ), ldwrku,
2332 $ a( 1, i ), lda )
2333 30 CONTINUE
2334*
2335 ELSE
2336*
2337* Insufficient workspace for a fast algorithm
2338*
2339 ie = 1
2340 itauq = 1
2341 itaup = itauq + m
2342 iwork = itaup + m
2343*
2344* Bidiagonalize A
2345* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
2346* (RWorkspace: need M)
2347*
2348 CALL cgebrd( m, n, a, lda, s, rwork( ie ),
2349 $ work( itauq ), work( itaup ),
2350 $ work( iwork ), lwork-iwork+1, ierr )
2351*
2352* Generate right vectors bidiagonalizing A
2353* (CWorkspace: need 3*M, prefer 2*M+M*NB)
2354* (RWorkspace: 0)
2355*
2356 CALL cungbr( 'P', m, n, m, a, lda, work( itaup ),
2357 $ work( iwork ), lwork-iwork+1, ierr )
2358 irwork = ie + m
2359*
2360* Perform bidiagonal QR iteration, computing right
2361* singular vectors of A in A
2362* (CWorkspace: 0)
2363* (RWorkspace: need BDSPAC)
2364*
2365 CALL cbdsqr( 'L', m, n, 0, 0, s, rwork( ie ), a, lda,
2366 $ cdum, 1, cdum, 1, rwork( irwork ), info )
2367*
2368 END IF
2369*
2370 ELSE IF( wntvo .AND. wntuas ) THEN
2371*
2372* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2373* M right singular vectors to be overwritten on A and
2374* M left singular vectors to be computed in U
2375*
2376 IF( lwork.GE.m*m+3*m ) THEN
2377*
2378* Sufficient workspace for a fast algorithm
2379*
2380 ir = 1
2381 IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN
2382*
2383* WORK(IU) is LDA by N and WORK(IR) is LDA by M
2384*
2385 ldwrku = lda
2386 chunk = n
2387 ldwrkr = lda
2388 ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN
2389*
2390* WORK(IU) is LDA by N and WORK(IR) is M by M
2391*
2392 ldwrku = lda
2393 chunk = n
2394 ldwrkr = m
2395 ELSE
2396*
2397* WORK(IU) is M by CHUNK and WORK(IR) is M by M
2398*
2399 ldwrku = m
2400 chunk = ( lwork-m*m ) / m
2401 ldwrkr = m
2402 END IF
2403 itau = ir + ldwrkr*m
2404 iwork = itau + m
2405*
2406* Compute A=L*Q
2407* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2408* (RWorkspace: 0)
2409*
2410 CALL cgelqf( m, n, a, lda, work( itau ),
2411 $ work( iwork ), lwork-iwork+1, ierr )
2412*
2413* Copy L to U, zeroing about above it
2414*
2415 CALL clacpy( 'L', m, m, a, lda, u, ldu )
2416 CALL claset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),
2417 $ ldu )
2418*
2419* Generate Q in A
2420* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2421* (RWorkspace: 0)
2422*
2423 CALL cunglq( m, n, m, a, lda, work( itau ),
2424 $ work( iwork ), lwork-iwork+1, ierr )
2425 ie = 1
2426 itauq = itau
2427 itaup = itauq + m
2428 iwork = itaup + m
2429*
2430* Bidiagonalize L in U, copying result to WORK(IR)
2431* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2432* (RWorkspace: need M)
2433*
2434 CALL cgebrd( m, m, u, ldu, s, rwork( ie ),
2435 $ work( itauq ), work( itaup ),
2436 $ work( iwork ), lwork-iwork+1, ierr )
2437 CALL clacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
2438*
2439* Generate right vectors bidiagonalizing L in WORK(IR)
2440* (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2441* (RWorkspace: 0)
2442*
2443 CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,
2444 $ work( itaup ), work( iwork ),
2445 $ lwork-iwork+1, ierr )
2446*
2447* Generate left vectors bidiagonalizing L in U
2448* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2449* (RWorkspace: 0)
2450*
2451 CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2452 $ work( iwork ), lwork-iwork+1, ierr )
2453 irwork = ie + m
2454*
2455* Perform bidiagonal QR iteration, computing left
2456* singular vectors of L in U, and computing right
2457* singular vectors of L in WORK(IR)
2458* (CWorkspace: need M*M)
2459* (RWorkspace: need BDSPAC)
2460*
2461 CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2462 $ work( ir ), ldwrkr, u, ldu, cdum, 1,
2463 $ rwork( irwork ), info )
2464 iu = itauq
2465*
2466* Multiply right singular vectors of L in WORK(IR) by Q
2467* in A, storing result in WORK(IU) and copying to A
2468* (CWorkspace: need M*M+M, prefer M*M+M*N))
2469* (RWorkspace: 0)
2470*
2471 DO 40 i = 1, n, chunk
2472 blk = min( n-i+1, chunk )
2473 CALL cgemm( 'N', 'N', m, blk, m, cone, work( ir ),
2474 $ ldwrkr, a( 1, i ), lda, czero,
2475 $ work( iu ), ldwrku )
2476 CALL clacpy( 'F', m, blk, work( iu ), ldwrku,
2477 $ a( 1, i ), lda )
2478 40 CONTINUE
2479*
2480 ELSE
2481*
2482* Insufficient workspace for a fast algorithm
2483*
2484 itau = 1
2485 iwork = itau + m
2486*
2487* Compute A=L*Q
2488* (CWorkspace: need 2*M, prefer M+M*NB)
2489* (RWorkspace: 0)
2490*
2491 CALL cgelqf( m, n, a, lda, work( itau ),
2492 $ work( iwork ), lwork-iwork+1, ierr )
2493*
2494* Copy L to U, zeroing out above it
2495*
2496 CALL clacpy( 'L', m, m, a, lda, u, ldu )
2497 CALL claset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),
2498 $ ldu )
2499*
2500* Generate Q in A
2501* (CWorkspace: need 2*M, prefer M+M*NB)
2502* (RWorkspace: 0)
2503*
2504 CALL cunglq( m, n, m, a, lda, work( itau ),
2505 $ work( iwork ), lwork-iwork+1, ierr )
2506 ie = 1
2507 itauq = itau
2508 itaup = itauq + m
2509 iwork = itaup + m
2510*
2511* Bidiagonalize L in U
2512* (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2513* (RWorkspace: need M)
2514*
2515 CALL cgebrd( m, m, u, ldu, s, rwork( ie ),
2516 $ work( itauq ), work( itaup ),
2517 $ work( iwork ), lwork-iwork+1, ierr )
2518*
2519* Multiply right vectors bidiagonalizing L by Q in A
2520* (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2521* (RWorkspace: 0)
2522*
2523 CALL cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
2524 $ work( itaup ), a, lda, work( iwork ),
2525 $ lwork-iwork+1, ierr )
2526*
2527* Generate left vectors bidiagonalizing L in U
2528* (CWorkspace: need 3*M, prefer 2*M+M*NB)
2529* (RWorkspace: 0)
2530*
2531 CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2532 $ work( iwork ), lwork-iwork+1, ierr )
2533 irwork = ie + m
2534*
2535* Perform bidiagonal QR iteration, computing left
2536* singular vectors of A in U and computing right
2537* singular vectors of A in A
2538* (CWorkspace: 0)
2539* (RWorkspace: need BDSPAC)
2540*
2541 CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), a, lda,
2542 $ u, ldu, cdum, 1, rwork( irwork ), info )
2543*
2544 END IF
2545*
2546 ELSE IF( wntvs ) THEN
2547*
2548 IF( wntun ) THEN
2549*
2550* Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2551* M right singular vectors to be computed in VT and
2552* no left singular vectors to be computed
2553*
2554 IF( lwork.GE.m*m+3*m ) THEN
2555*
2556* Sufficient workspace for a fast algorithm
2557*
2558 ir = 1
2559 IF( lwork.GE.wrkbl+lda*m ) THEN
2560*
2561* WORK(IR) is LDA by M
2562*
2563 ldwrkr = lda
2564 ELSE
2565*
2566* WORK(IR) is M by M
2567*
2568 ldwrkr = m
2569 END IF
2570 itau = ir + ldwrkr*m
2571 iwork = itau + m
2572*
2573* Compute A=L*Q
2574* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2575* (RWorkspace: 0)
2576*
2577 CALL cgelqf( m, n, a, lda, work( itau ),
2578 $ work( iwork ), lwork-iwork+1, ierr )
2579*
2580* Copy L to WORK(IR), zeroing out above it
2581*
2582 CALL clacpy( 'L', m, m, a, lda, work( ir ),
2583 $ ldwrkr )
2584 CALL claset( 'U', m-1, m-1, czero, czero,
2585 $ work( ir+ldwrkr ), ldwrkr )
2586*
2587* Generate Q in A
2588* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2589* (RWorkspace: 0)
2590*
2591 CALL cunglq( m, n, m, a, lda, work( itau ),
2592 $ work( iwork ), lwork-iwork+1, ierr )
2593 ie = 1
2594 itauq = itau
2595 itaup = itauq + m
2596 iwork = itaup + m
2597*
2598* Bidiagonalize L in WORK(IR)
2599* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2600* (RWorkspace: need M)
2601*
2602 CALL cgebrd( m, m, work( ir ), ldwrkr, s,
2603 $ rwork( ie ), work( itauq ),
2604 $ work( itaup ), work( iwork ),
2605 $ lwork-iwork+1, ierr )
2606*
2607* Generate right vectors bidiagonalizing L in
2608* WORK(IR)
2609* (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
2610* (RWorkspace: 0)
2611*
2612 CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,
2613 $ work( itaup ), work( iwork ),
2614 $ lwork-iwork+1, ierr )
2615 irwork = ie + m
2616*
2617* Perform bidiagonal QR iteration, computing right
2618* singular vectors of L in WORK(IR)
2619* (CWorkspace: need M*M)
2620* (RWorkspace: need BDSPAC)
2621*
2622 CALL cbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
2623 $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
2624 $ rwork( irwork ), info )
2625*
2626* Multiply right singular vectors of L in WORK(IR) by
2627* Q in A, storing result in VT
2628* (CWorkspace: need M*M)
2629* (RWorkspace: 0)
2630*
2631 CALL cgemm( 'N', 'N', m, n, m, cone, work( ir ),
2632 $ ldwrkr, a, lda, czero, vt, ldvt )
2633*
2634 ELSE
2635*
2636* Insufficient workspace for a fast algorithm
2637*
2638 itau = 1
2639 iwork = itau + m
2640*
2641* Compute A=L*Q
2642* (CWorkspace: need 2*M, prefer M+M*NB)
2643* (RWorkspace: 0)
2644*
2645 CALL cgelqf( m, n, a, lda, work( itau ),
2646 $ work( iwork ), lwork-iwork+1, ierr )
2647*
2648* Copy result to VT
2649*
2650 CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
2651*
2652* Generate Q in VT
2653* (CWorkspace: need 2*M, prefer M+M*NB)
2654* (RWorkspace: 0)
2655*
2656 CALL cunglq( m, n, m, vt, ldvt, work( itau ),
2657 $ work( iwork ), lwork-iwork+1, ierr )
2658 ie = 1
2659 itauq = itau
2660 itaup = itauq + m
2661 iwork = itaup + m
2662*
2663* Zero out above L in A
2664*
2665 CALL claset( 'U', m-1, m-1, czero, czero,
2666 $ a( 1, 2 ), lda )
2667*
2668* Bidiagonalize L in A
2669* (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2670* (RWorkspace: need M)
2671*
2672 CALL cgebrd( m, m, a, lda, s, rwork( ie ),
2673 $ work( itauq ), work( itaup ),
2674 $ work( iwork ), lwork-iwork+1, ierr )
2675*
2676* Multiply right vectors bidiagonalizing L by Q in VT
2677* (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2678* (RWorkspace: 0)
2679*
2680 CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,
2681 $ work( itaup ), vt, ldvt,
2682 $ work( iwork ), lwork-iwork+1, ierr )
2683 irwork = ie + m
2684*
2685* Perform bidiagonal QR iteration, computing right
2686* singular vectors of A in VT
2687* (CWorkspace: 0)
2688* (RWorkspace: need BDSPAC)
2689*
2690 CALL cbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,
2691 $ ldvt, cdum, 1, cdum, 1,
2692 $ rwork( irwork ), info )
2693*
2694 END IF
2695*
2696 ELSE IF( wntuo ) THEN
2697*
2698* Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2699* M right singular vectors to be computed in VT and
2700* M left singular vectors to be overwritten on A
2701*
2702 IF( lwork.GE.2*m*m+3*m ) THEN
2703*
2704* Sufficient workspace for a fast algorithm
2705*
2706 iu = 1
2707 IF( lwork.GE.wrkbl+2*lda*m ) THEN
2708*
2709* WORK(IU) is LDA by M and WORK(IR) is LDA by M
2710*
2711 ldwrku = lda
2712 ir = iu + ldwrku*m
2713 ldwrkr = lda
2714 ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
2715*
2716* WORK(IU) is LDA by M and WORK(IR) is M by M
2717*
2718 ldwrku = lda
2719 ir = iu + ldwrku*m
2720 ldwrkr = m
2721 ELSE
2722*
2723* WORK(IU) is M by M and WORK(IR) is M by M
2724*
2725 ldwrku = m
2726 ir = iu + ldwrku*m
2727 ldwrkr = m
2728 END IF
2729 itau = ir + ldwrkr*m
2730 iwork = itau + m
2731*
2732* Compute A=L*Q
2733* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2734* (RWorkspace: 0)
2735*
2736 CALL cgelqf( m, n, a, lda, work( itau ),
2737 $ work( iwork ), lwork-iwork+1, ierr )
2738*
2739* Copy L to WORK(IU), zeroing out below it
2740*
2741 CALL clacpy( 'L', m, m, a, lda, work( iu ),
2742 $ ldwrku )
2743 CALL claset( 'U', m-1, m-1, czero, czero,
2744 $ work( iu+ldwrku ), ldwrku )
2745*
2746* Generate Q in A
2747* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2748* (RWorkspace: 0)
2749*
2750 CALL cunglq( m, n, m, a, lda, work( itau ),
2751 $ work( iwork ), lwork-iwork+1, ierr )
2752 ie = 1
2753 itauq = itau
2754 itaup = itauq + m
2755 iwork = itaup + m
2756*
2757* Bidiagonalize L in WORK(IU), copying result to
2758* WORK(IR)
2759* (CWorkspace: need 2*M*M+3*M,
2760* prefer 2*M*M+2*M+2*M*NB)
2761* (RWorkspace: need M)
2762*
2763 CALL cgebrd( m, m, work( iu ), ldwrku, s,
2764 $ rwork( ie ), work( itauq ),
2765 $ work( itaup ), work( iwork ),
2766 $ lwork-iwork+1, ierr )
2767 CALL clacpy( 'L', m, m, work( iu ), ldwrku,
2768 $ work( ir ), ldwrkr )
2769*
2770* Generate right bidiagonalizing vectors in WORK(IU)
2771* (CWorkspace: need 2*M*M+3*M-1,
2772* prefer 2*M*M+2*M+(M-1)*NB)
2773* (RWorkspace: 0)
2774*
2775 CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,
2776 $ work( itaup ), work( iwork ),
2777 $ lwork-iwork+1, ierr )
2778*
2779* Generate left bidiagonalizing vectors in WORK(IR)
2780* (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
2781* (RWorkspace: 0)
2782*
2783 CALL cungbr( 'Q', m, m, m, work( ir ), ldwrkr,
2784 $ work( itauq ), work( iwork ),
2785 $ lwork-iwork+1, ierr )
2786 irwork = ie + m
2787*
2788* Perform bidiagonal QR iteration, computing left
2789* singular vectors of L in WORK(IR) and computing
2790* right singular vectors of L in WORK(IU)
2791* (CWorkspace: need 2*M*M)
2792* (RWorkspace: need BDSPAC)
2793*
2794 CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2795 $ work( iu ), ldwrku, work( ir ),
2796 $ ldwrkr, cdum, 1, rwork( irwork ),
2797 $ info )
2798*
2799* Multiply right singular vectors of L in WORK(IU) by
2800* Q in A, storing result in VT
2801* (CWorkspace: need M*M)
2802* (RWorkspace: 0)
2803*
2804 CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),
2805 $ ldwrku, a, lda, czero, vt, ldvt )
2806*
2807* Copy left singular vectors of L to A
2808* (CWorkspace: need M*M)
2809* (RWorkspace: 0)
2810*
2811 CALL clacpy( 'F', m, m, work( ir ), ldwrkr, a,
2812 $ lda )
2813*
2814 ELSE
2815*
2816* Insufficient workspace for a fast algorithm
2817*
2818 itau = 1
2819 iwork = itau + m
2820*
2821* Compute A=L*Q, copying result to VT
2822* (CWorkspace: need 2*M, prefer M+M*NB)
2823* (RWorkspace: 0)
2824*
2825 CALL cgelqf( m, n, a, lda, work( itau ),
2826 $ work( iwork ), lwork-iwork+1, ierr )
2827 CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
2828*
2829* Generate Q in VT
2830* (CWorkspace: need 2*M, prefer M+M*NB)
2831* (RWorkspace: 0)
2832*
2833 CALL cunglq( m, n, m, vt, ldvt, work( itau ),
2834 $ work( iwork ), lwork-iwork+1, ierr )
2835 ie = 1
2836 itauq = itau
2837 itaup = itauq + m
2838 iwork = itaup + m
2839*
2840* Zero out above L in A
2841*
2842 CALL claset( 'U', m-1, m-1, czero, czero,
2843 $ a( 1, 2 ), lda )
2844*
2845* Bidiagonalize L in A
2846* (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2847* (RWorkspace: need M)
2848*
2849 CALL cgebrd( m, m, a, lda, s, rwork( ie ),
2850 $ work( itauq ), work( itaup ),
2851 $ work( iwork ), lwork-iwork+1, ierr )
2852*
2853* Multiply right vectors bidiagonalizing L by Q in VT
2854* (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2855* (RWorkspace: 0)
2856*
2857 CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,
2858 $ work( itaup ), vt, ldvt,
2859 $ work( iwork ), lwork-iwork+1, ierr )
2860*
2861* Generate left bidiagonalizing vectors of L in A
2862* (CWorkspace: need 3*M, prefer 2*M+M*NB)
2863* (RWorkspace: 0)
2864*
2865 CALL cungbr( 'Q', m, m, m, a, lda, work( itauq ),
2866 $ work( iwork ), lwork-iwork+1, ierr )
2867 irwork = ie + m
2868*
2869* Perform bidiagonal QR iteration, computing left
2870* singular vectors of A in A and computing right
2871* singular vectors of A in VT
2872* (CWorkspace: 0)
2873* (RWorkspace: need BDSPAC)
2874*
2875 CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
2876 $ ldvt, a, lda, cdum, 1,
2877 $ rwork( irwork ), info )
2878*
2879 END IF
2880*
2881 ELSE IF( wntuas ) THEN
2882*
2883* Path 6t(N much larger than M, JOBU='S' or 'A',
2884* JOBVT='S')
2885* M right singular vectors to be computed in VT and
2886* M left singular vectors to be computed in U
2887*
2888 IF( lwork.GE.m*m+3*m ) THEN
2889*
2890* Sufficient workspace for a fast algorithm
2891*
2892 iu = 1
2893 IF( lwork.GE.wrkbl+lda*m ) THEN
2894*
2895* WORK(IU) is LDA by N
2896*
2897 ldwrku = lda
2898 ELSE
2899*
2900* WORK(IU) is LDA by M
2901*
2902 ldwrku = m
2903 END IF
2904 itau = iu + ldwrku*m
2905 iwork = itau + m
2906*
2907* Compute A=L*Q
2908* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2909* (RWorkspace: 0)
2910*
2911 CALL cgelqf( m, n, a, lda, work( itau ),
2912 $ work( iwork ), lwork-iwork+1, ierr )
2913*
2914* Copy L to WORK(IU), zeroing out above it
2915*
2916 CALL clacpy( 'L', m, m, a, lda, work( iu ),
2917 $ ldwrku )
2918 CALL claset( 'U', m-1, m-1, czero, czero,
2919 $ work( iu+ldwrku ), ldwrku )
2920*
2921* Generate Q in A
2922* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2923* (RWorkspace: 0)
2924*
2925 CALL cunglq( m, n, m, a, lda, work( itau ),
2926 $ work( iwork ), lwork-iwork+1, ierr )
2927 ie = 1
2928 itauq = itau
2929 itaup = itauq + m
2930 iwork = itaup + m
2931*
2932* Bidiagonalize L in WORK(IU), copying result to U
2933* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2934* (RWorkspace: need M)
2935*
2936 CALL cgebrd( m, m, work( iu ), ldwrku, s,
2937 $ rwork( ie ), work( itauq ),
2938 $ work( itaup ), work( iwork ),
2939 $ lwork-iwork+1, ierr )
2940 CALL clacpy( 'L', m, m, work( iu ), ldwrku, u,
2941 $ ldu )
2942*
2943* Generate right bidiagonalizing vectors in WORK(IU)
2944* (CWorkspace: need M*M+3*M-1,
2945* prefer M*M+2*M+(M-1)*NB)
2946* (RWorkspace: 0)
2947*
2948 CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,
2949 $ work( itaup ), work( iwork ),
2950 $ lwork-iwork+1, ierr )
2951*
2952* Generate left bidiagonalizing vectors in U
2953* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2954* (RWorkspace: 0)
2955*
2956 CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2957 $ work( iwork ), lwork-iwork+1, ierr )
2958 irwork = ie + m
2959*
2960* Perform bidiagonal QR iteration, computing left
2961* singular vectors of L in U and computing right
2962* singular vectors of L in WORK(IU)
2963* (CWorkspace: need M*M)
2964* (RWorkspace: need BDSPAC)
2965*
2966 CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2967 $ work( iu ), ldwrku, u, ldu, cdum, 1,
2968 $ rwork( irwork ), info )
2969*
2970* Multiply right singular vectors of L in WORK(IU) by
2971* Q in A, storing result in VT
2972* (CWorkspace: need M*M)
2973* (RWorkspace: 0)
2974*
2975 CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),
2976 $ ldwrku, a, lda, czero, vt, ldvt )
2977*
2978 ELSE
2979*
2980* Insufficient workspace for a fast algorithm
2981*
2982 itau = 1
2983 iwork = itau + m
2984*
2985* Compute A=L*Q, copying result to VT
2986* (CWorkspace: need 2*M, prefer M+M*NB)
2987* (RWorkspace: 0)
2988*
2989 CALL cgelqf( m, n, a, lda, work( itau ),
2990 $ work( iwork ), lwork-iwork+1, ierr )
2991 CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
2992*
2993* Generate Q in VT
2994* (CWorkspace: need 2*M, prefer M+M*NB)
2995* (RWorkspace: 0)
2996*
2997 CALL cunglq( m, n, m, vt, ldvt, work( itau ),
2998 $ work( iwork ), lwork-iwork+1, ierr )
2999*
3000* Copy L to U, zeroing out above it
3001*
3002 CALL clacpy( 'L', m, m, a, lda, u, ldu )
3003 CALL claset( 'U', m-1, m-1, czero, czero,
3004 $ u( 1, 2 ), ldu )
3005 ie = 1
3006 itauq = itau
3007 itaup = itauq + m
3008 iwork = itaup + m
3009*
3010* Bidiagonalize L in U
3011* (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3012* (RWorkspace: need M)
3013*
3014 CALL cgebrd( m, m, u, ldu, s, rwork( ie ),
3015 $ work( itauq ), work( itaup ),
3016 $ work( iwork ), lwork-iwork+1, ierr )
3017*
3018* Multiply right bidiagonalizing vectors in U by Q
3019* in VT
3020* (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3021* (RWorkspace: 0)
3022*
3023 CALL cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
3024 $ work( itaup ), vt, ldvt,
3025 $ work( iwork ), lwork-iwork+1, ierr )
3026*
3027* Generate left bidiagonalizing vectors in U
3028* (CWorkspace: need 3*M, prefer 2*M+M*NB)
3029* (RWorkspace: 0)
3030*
3031 CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3032 $ work( iwork ), lwork-iwork+1, ierr )
3033 irwork = ie + m
3034*
3035* Perform bidiagonal QR iteration, computing left
3036* singular vectors of A in U and computing right
3037* singular vectors of A in VT
3038* (CWorkspace: 0)
3039* (RWorkspace: need BDSPAC)
3040*
3041 CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3042 $ ldvt, u, ldu, cdum, 1,
3043 $ rwork( irwork ), info )
3044*
3045 END IF
3046*
3047 END IF
3048*
3049 ELSE IF( wntva ) THEN
3050*
3051 IF( wntun ) THEN
3052*
3053* Path 7t(N much larger than M, JOBU='N', JOBVT='A')
3054* N right singular vectors to be computed in VT and
3055* no left singular vectors to be computed
3056*
3057 IF( lwork.GE.m*m+max( n+m, 3*m ) ) THEN
3058*
3059* Sufficient workspace for a fast algorithm
3060*
3061 ir = 1
3062 IF( lwork.GE.wrkbl+lda*m ) THEN
3063*
3064* WORK(IR) is LDA by M
3065*
3066 ldwrkr = lda
3067 ELSE
3068*
3069* WORK(IR) is M by M
3070*
3071 ldwrkr = m
3072 END IF
3073 itau = ir + ldwrkr*m
3074 iwork = itau + m
3075*
3076* Compute A=L*Q, copying result to VT
3077* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3078* (RWorkspace: 0)
3079*
3080 CALL cgelqf( m, n, a, lda, work( itau ),
3081 $ work( iwork ), lwork-iwork+1, ierr )
3082 CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3083*
3084* Copy L to WORK(IR), zeroing out above it
3085*
3086 CALL clacpy( 'L', m, m, a, lda, work( ir ),
3087 $ ldwrkr )
3088 CALL claset( 'U', m-1, m-1, czero, czero,
3089 $ work( ir+ldwrkr ), ldwrkr )
3090*
3091* Generate Q in VT
3092* (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3093* (RWorkspace: 0)
3094*
3095 CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3096 $ work( iwork ), lwork-iwork+1, ierr )
3097 ie = 1
3098 itauq = itau
3099 itaup = itauq + m
3100 iwork = itaup + m
3101*
3102* Bidiagonalize L in WORK(IR)
3103* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3104* (RWorkspace: need M)
3105*
3106 CALL cgebrd( m, m, work( ir ), ldwrkr, s,
3107 $ rwork( ie ), work( itauq ),
3108 $ work( itaup ), work( iwork ),
3109 $ lwork-iwork+1, ierr )
3110*
3111* Generate right bidiagonalizing vectors in WORK(IR)
3112* (CWorkspace: need M*M+3*M-1,
3113* prefer M*M+2*M+(M-1)*NB)
3114* (RWorkspace: 0)
3115*
3116 CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,
3117 $ work( itaup ), work( iwork ),
3118 $ lwork-iwork+1, ierr )
3119 irwork = ie + m
3120*
3121* Perform bidiagonal QR iteration, computing right
3122* singular vectors of L in WORK(IR)
3123* (CWorkspace: need M*M)
3124* (RWorkspace: need BDSPAC)
3125*
3126 CALL cbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
3127 $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
3128 $ rwork( irwork ), info )
3129*
3130* Multiply right singular vectors of L in WORK(IR) by
3131* Q in VT, storing result in A
3132* (CWorkspace: need M*M)
3133* (RWorkspace: 0)
3134*
3135 CALL cgemm( 'N', 'N', m, n, m, cone, work( ir ),
3136 $ ldwrkr, vt, ldvt, czero, a, lda )
3137*
3138* Copy right singular vectors of A from A to VT
3139*
3140 CALL clacpy( 'F', m, n, a, lda, vt, ldvt )
3141*
3142 ELSE
3143*
3144* Insufficient workspace for a fast algorithm
3145*
3146 itau = 1
3147 iwork = itau + m
3148*
3149* Compute A=L*Q, copying result to VT
3150* (CWorkspace: need 2*M, prefer M+M*NB)
3151* (RWorkspace: 0)
3152*
3153 CALL cgelqf( m, n, a, lda, work( itau ),
3154 $ work( iwork ), lwork-iwork+1, ierr )
3155 CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3156*
3157* Generate Q in VT
3158* (CWorkspace: need M+N, prefer M+N*NB)
3159* (RWorkspace: 0)
3160*
3161 CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3162 $ work( iwork ), lwork-iwork+1, ierr )
3163 ie = 1
3164 itauq = itau
3165 itaup = itauq + m
3166 iwork = itaup + m
3167*
3168* Zero out above L in A
3169*
3170 CALL claset( 'U', m-1, m-1, czero, czero,
3171 $ a( 1, 2 ), lda )
3172*
3173* Bidiagonalize L in A
3174* (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3175* (RWorkspace: need M)
3176*
3177 CALL cgebrd( m, m, a, lda, s, rwork( ie ),
3178 $ work( itauq ), work( itaup ),
3179 $ work( iwork ), lwork-iwork+1, ierr )
3180*
3181* Multiply right bidiagonalizing vectors in A by Q
3182* in VT
3183* (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3184* (RWorkspace: 0)
3185*
3186 CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,
3187 $ work( itaup ), vt, ldvt,
3188 $ work( iwork ), lwork-iwork+1, ierr )
3189 irwork = ie + m
3190*
3191* Perform bidiagonal QR iteration, computing right
3192* singular vectors of A in VT
3193* (CWorkspace: 0)
3194* (RWorkspace: need BDSPAC)
3195*
3196 CALL cbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,
3197 $ ldvt, cdum, 1, cdum, 1,
3198 $ rwork( irwork ), info )
3199*
3200 END IF
3201*
3202 ELSE IF( wntuo ) THEN
3203*
3204* Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3205* N right singular vectors to be computed in VT and
3206* M left singular vectors to be overwritten on A
3207*
3208 IF( lwork.GE.2*m*m+max( n+m, 3*m ) ) THEN
3209*
3210* Sufficient workspace for a fast algorithm
3211*
3212 iu = 1
3213 IF( lwork.GE.wrkbl+2*lda*m ) THEN
3214*
3215* WORK(IU) is LDA by M and WORK(IR) is LDA by M
3216*
3217 ldwrku = lda
3218 ir = iu + ldwrku*m
3219 ldwrkr = lda
3220 ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
3221*
3222* WORK(IU) is LDA by M and WORK(IR) is M by M
3223*
3224 ldwrku = lda
3225 ir = iu + ldwrku*m
3226 ldwrkr = m
3227 ELSE
3228*
3229* WORK(IU) is M by M and WORK(IR) is M by M
3230*
3231 ldwrku = m
3232 ir = iu + ldwrku*m
3233 ldwrkr = m
3234 END IF
3235 itau = ir + ldwrkr*m
3236 iwork = itau + m
3237*
3238* Compute A=L*Q, copying result to VT
3239* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3240* (RWorkspace: 0)
3241*
3242 CALL cgelqf( m, n, a, lda, work( itau ),
3243 $ work( iwork ), lwork-iwork+1, ierr )
3244 CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3245*
3246* Generate Q in VT
3247* (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3248* (RWorkspace: 0)
3249*
3250 CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3251 $ work( iwork ), lwork-iwork+1, ierr )
3252*
3253* Copy L to WORK(IU), zeroing out above it
3254*
3255 CALL clacpy( 'L', m, m, a, lda, work( iu ),
3256 $ ldwrku )
3257 CALL claset( 'U', m-1, m-1, czero, czero,
3258 $ work( iu+ldwrku ), ldwrku )
3259 ie = 1
3260 itauq = itau
3261 itaup = itauq + m
3262 iwork = itaup + m
3263*
3264* Bidiagonalize L in WORK(IU), copying result to
3265* WORK(IR)
3266* (CWorkspace: need 2*M*M+3*M,
3267* prefer 2*M*M+2*M+2*M*NB)
3268* (RWorkspace: need M)
3269*
3270 CALL cgebrd( m, m, work( iu ), ldwrku, s,
3271 $ rwork( ie ), work( itauq ),
3272 $ work( itaup ), work( iwork ),
3273 $ lwork-iwork+1, ierr )
3274 CALL clacpy( 'L', m, m, work( iu ), ldwrku,
3275 $ work( ir ), ldwrkr )
3276*
3277* Generate right bidiagonalizing vectors in WORK(IU)
3278* (CWorkspace: need 2*M*M+3*M-1,
3279* prefer 2*M*M+2*M+(M-1)*NB)
3280* (RWorkspace: 0)
3281*
3282 CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,
3283 $ work( itaup ), work( iwork ),
3284 $ lwork-iwork+1, ierr )
3285*
3286* Generate left bidiagonalizing vectors in WORK(IR)
3287* (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
3288* (RWorkspace: 0)
3289*
3290 CALL cungbr( 'Q', m, m, m, work( ir ), ldwrkr,
3291 $ work( itauq ), work( iwork ),
3292 $ lwork-iwork+1, ierr )
3293 irwork = ie + m
3294*
3295* Perform bidiagonal QR iteration, computing left
3296* singular vectors of L in WORK(IR) and computing
3297* right singular vectors of L in WORK(IU)
3298* (CWorkspace: need 2*M*M)
3299* (RWorkspace: need BDSPAC)
3300*
3301 CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
3302 $ work( iu ), ldwrku, work( ir ),
3303 $ ldwrkr, cdum, 1, rwork( irwork ),
3304 $ info )
3305*
3306* Multiply right singular vectors of L in WORK(IU) by
3307* Q in VT, storing result in A
3308* (CWorkspace: need M*M)
3309* (RWorkspace: 0)
3310*
3311 CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),
3312 $ ldwrku, vt, ldvt, czero, a, lda )
3313*
3314* Copy right singular vectors of A from A to VT
3315*
3316 CALL clacpy( 'F', m, n, a, lda, vt, ldvt )
3317*
3318* Copy left singular vectors of A from WORK(IR) to A
3319*
3320 CALL clacpy( 'F', m, m, work( ir ), ldwrkr, a,
3321 $ lda )
3322*
3323 ELSE
3324*
3325* Insufficient workspace for a fast algorithm
3326*
3327 itau = 1
3328 iwork = itau + m
3329*
3330* Compute A=L*Q, copying result to VT
3331* (CWorkspace: need 2*M, prefer M+M*NB)
3332* (RWorkspace: 0)
3333*
3334 CALL cgelqf( m, n, a, lda, work( itau ),
3335 $ work( iwork ), lwork-iwork+1, ierr )
3336 CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3337*
3338* Generate Q in VT
3339* (CWorkspace: need M+N, prefer M+N*NB)
3340* (RWorkspace: 0)
3341*
3342 CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3343 $ work( iwork ), lwork-iwork+1, ierr )
3344 ie = 1
3345 itauq = itau
3346 itaup = itauq + m
3347 iwork = itaup + m
3348*
3349* Zero out above L in A
3350*
3351 CALL claset( 'U', m-1, m-1, czero, czero,
3352 $ a( 1, 2 ), lda )
3353*
3354* Bidiagonalize L in A
3355* (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3356* (RWorkspace: need M)
3357*
3358 CALL cgebrd( m, m, a, lda, s, rwork( ie ),
3359 $ work( itauq ), work( itaup ),
3360 $ work( iwork ), lwork-iwork+1, ierr )
3361*
3362* Multiply right bidiagonalizing vectors in A by Q
3363* in VT
3364* (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3365* (RWorkspace: 0)
3366*
3367 CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,
3368 $ work( itaup ), vt, ldvt,
3369 $ work( iwork ), lwork-iwork+1, ierr )
3370*
3371* Generate left bidiagonalizing vectors in A
3372* (CWorkspace: need 3*M, prefer 2*M+M*NB)
3373* (RWorkspace: 0)
3374*
3375 CALL cungbr( 'Q', m, m, m, a, lda, work( itauq ),
3376 $ work( iwork ), lwork-iwork+1, ierr )
3377 irwork = ie + m
3378*
3379* Perform bidiagonal QR iteration, computing left
3380* singular vectors of A in A and computing right
3381* singular vectors of A in VT
3382* (CWorkspace: 0)
3383* (RWorkspace: need BDSPAC)
3384*
3385 CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3386 $ ldvt, a, lda, cdum, 1,
3387 $ rwork( irwork ), info )
3388*
3389 END IF
3390*
3391 ELSE IF( wntuas ) THEN
3392*
3393* Path 9t(N much larger than M, JOBU='S' or 'A',
3394* JOBVT='A')
3395* N right singular vectors to be computed in VT and
3396* M left singular vectors to be computed in U
3397*
3398 IF( lwork.GE.m*m+max( n+m, 3*m ) ) THEN
3399*
3400* Sufficient workspace for a fast algorithm
3401*
3402 iu = 1
3403 IF( lwork.GE.wrkbl+lda*m ) THEN
3404*
3405* WORK(IU) is LDA by M
3406*
3407 ldwrku = lda
3408 ELSE
3409*
3410* WORK(IU) is M by M
3411*
3412 ldwrku = m
3413 END IF
3414 itau = iu + ldwrku*m
3415 iwork = itau + m
3416*
3417* Compute A=L*Q, copying result to VT
3418* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3419* (RWorkspace: 0)
3420*
3421 CALL cgelqf( m, n, a, lda, work( itau ),
3422 $ work( iwork ), lwork-iwork+1, ierr )
3423 CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3424*
3425* Generate Q in VT
3426* (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3427* (RWorkspace: 0)
3428*
3429 CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3430 $ work( iwork ), lwork-iwork+1, ierr )
3431*
3432* Copy L to WORK(IU), zeroing out above it
3433*
3434 CALL clacpy( 'L', m, m, a, lda, work( iu ),
3435 $ ldwrku )
3436 CALL claset( 'U', m-1, m-1, czero, czero,
3437 $ work( iu+ldwrku ), ldwrku )
3438 ie = 1
3439 itauq = itau
3440 itaup = itauq + m
3441 iwork = itaup + m
3442*
3443* Bidiagonalize L in WORK(IU), copying result to U
3444* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3445* (RWorkspace: need M)
3446*
3447 CALL cgebrd( m, m, work( iu ), ldwrku, s,
3448 $ rwork( ie ), work( itauq ),
3449 $ work( itaup ), work( iwork ),
3450 $ lwork-iwork+1, ierr )
3451 CALL clacpy( 'L', m, m, work( iu ), ldwrku, u,
3452 $ ldu )
3453*
3454* Generate right bidiagonalizing vectors in WORK(IU)
3455* (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
3456* (RWorkspace: 0)
3457*
3458 CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,
3459 $ work( itaup ), work( iwork ),
3460 $ lwork-iwork+1, ierr )
3461*
3462* Generate left bidiagonalizing vectors in U
3463* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
3464* (RWorkspace: 0)
3465*
3466 CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3467 $ work( iwork ), lwork-iwork+1, ierr )
3468 irwork = ie + m
3469*
3470* Perform bidiagonal QR iteration, computing left
3471* singular vectors of L in U and computing right
3472* singular vectors of L in WORK(IU)
3473* (CWorkspace: need M*M)
3474* (RWorkspace: need BDSPAC)
3475*
3476 CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
3477 $ work( iu ), ldwrku, u, ldu, cdum, 1,
3478 $ rwork( irwork ), info )
3479*
3480* Multiply right singular vectors of L in WORK(IU) by
3481* Q in VT, storing result in A
3482* (CWorkspace: need M*M)
3483* (RWorkspace: 0)
3484*
3485 CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),
3486 $ ldwrku, vt, ldvt, czero, a, lda )
3487*
3488* Copy right singular vectors of A from A to VT
3489*
3490 CALL clacpy( 'F', m, n, a, lda, vt, ldvt )
3491*
3492 ELSE
3493*
3494* Insufficient workspace for a fast algorithm
3495*
3496 itau = 1
3497 iwork = itau + m
3498*
3499* Compute A=L*Q, copying result to VT
3500* (CWorkspace: need 2*M, prefer M+M*NB)
3501* (RWorkspace: 0)
3502*
3503 CALL cgelqf( m, n, a, lda, work( itau ),
3504 $ work( iwork ), lwork-iwork+1, ierr )
3505 CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3506*
3507* Generate Q in VT
3508* (CWorkspace: need M+N, prefer M+N*NB)
3509* (RWorkspace: 0)
3510*
3511 CALL cunglq( n, n, m, vt, ldvt, work( itau ),
3512 $ work( iwork ), lwork-iwork+1, ierr )
3513*
3514* Copy L to U, zeroing out above it
3515*
3516 CALL clacpy( 'L', m, m, a, lda, u, ldu )
3517 CALL claset( 'U', m-1, m-1, czero, czero,
3518 $ u( 1, 2 ), ldu )
3519 ie = 1
3520 itauq = itau
3521 itaup = itauq + m
3522 iwork = itaup + m
3523*
3524* Bidiagonalize L in U
3525* (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3526* (RWorkspace: need M)
3527*
3528 CALL cgebrd( m, m, u, ldu, s, rwork( ie ),
3529 $ work( itauq ), work( itaup ),
3530 $ work( iwork ), lwork-iwork+1, ierr )
3531*
3532* Multiply right bidiagonalizing vectors in U by Q
3533* in VT
3534* (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3535* (RWorkspace: 0)
3536*
3537 CALL cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
3538 $ work( itaup ), vt, ldvt,
3539 $ work( iwork ), lwork-iwork+1, ierr )
3540*
3541* Generate left bidiagonalizing vectors in U
3542* (CWorkspace: need 3*M, prefer 2*M+M*NB)
3543* (RWorkspace: 0)
3544*
3545 CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3546 $ work( iwork ), lwork-iwork+1, ierr )
3547 irwork = ie + m
3548*
3549* Perform bidiagonal QR iteration, computing left
3550* singular vectors of A in U and computing right
3551* singular vectors of A in VT
3552* (CWorkspace: 0)
3553* (RWorkspace: need BDSPAC)
3554*
3555 CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3556 $ ldvt, u, ldu, cdum, 1,
3557 $ rwork( irwork ), info )
3558*
3559 END IF
3560*
3561 END IF
3562*
3563 END IF
3564*
3565 ELSE
3566*
3567* N .LT. MNTHR
3568*
3569* Path 10t(N greater than M, but not much larger)
3570* Reduce to bidiagonal form without LQ decomposition
3571*
3572 ie = 1
3573 itauq = 1
3574 itaup = itauq + m
3575 iwork = itaup + m
3576*
3577* Bidiagonalize A
3578* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
3579* (RWorkspace: M)
3580*
3581 CALL cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
3582 $ work( itaup ), work( iwork ), lwork-iwork+1,
3583 $ ierr )
3584 IF( wntuas ) THEN
3585*
3586* If left singular vectors desired in U, copy result to U
3587* and generate left bidiagonalizing vectors in U
3588* (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3589* (RWorkspace: 0)
3590*
3591 CALL clacpy( 'L', m, m, a, lda, u, ldu )
3592 CALL cungbr( 'Q', m, m, n, u, ldu, work( itauq ),
3593 $ work( iwork ), lwork-iwork+1, ierr )
3594 END IF
3595 IF( wntvas ) THEN
3596*
3597* If right singular vectors desired in VT, copy result to
3598* VT and generate right bidiagonalizing vectors in VT
3599* (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB)
3600* (RWorkspace: 0)
3601*
3602 CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
3603 IF( wntva )
3604 $ nrvt = n
3605 IF( wntvs )
3606 $ nrvt = m
3607 CALL cungbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
3608 $ work( iwork ), lwork-iwork+1, ierr )
3609 END IF
3610 IF( wntuo ) THEN
3611*
3612* If left singular vectors desired in A, generate left
3613* bidiagonalizing vectors in A
3614* (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3615* (RWorkspace: 0)
3616*
3617 CALL cungbr( 'Q', m, m, n, a, lda, work( itauq ),
3618 $ work( iwork ), lwork-iwork+1, ierr )
3619 END IF
3620 IF( wntvo ) THEN
3621*
3622* If right singular vectors desired in A, generate right
3623* bidiagonalizing vectors in A
3624* (CWorkspace: need 3*M, prefer 2*M+M*NB)
3625* (RWorkspace: 0)
3626*
3627 CALL cungbr( 'P', m, n, m, a, lda, work( itaup ),
3628 $ work( iwork ), lwork-iwork+1, ierr )
3629 END IF
3630 irwork = ie + m
3631 IF( wntuas .OR. wntuo )
3632 $ nru = m
3633 IF( wntun )
3634 $ nru = 0
3635 IF( wntvas .OR. wntvo )
3636 $ ncvt = n
3637 IF( wntvn )
3638 $ ncvt = 0
3639 IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
3640*
3641* Perform bidiagonal QR iteration, if desired, computing
3642* left singular vectors in U and computing right singular
3643* vectors in VT
3644* (CWorkspace: 0)
3645* (RWorkspace: need BDSPAC)
3646*
3647 CALL cbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,
3648 $ ldvt, u, ldu, cdum, 1, rwork( irwork ),
3649 $ info )
3650 ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
3651*
3652* Perform bidiagonal QR iteration, if desired, computing
3653* left singular vectors in U and computing right singular
3654* vectors in A
3655* (CWorkspace: 0)
3656* (RWorkspace: need BDSPAC)
3657*
3658 CALL cbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), a,
3659 $ lda, u, ldu, cdum, 1, rwork( irwork ),
3660 $ info )
3661 ELSE
3662*
3663* Perform bidiagonal QR iteration, if desired, computing
3664* left singular vectors in A and computing right singular
3665* vectors in VT
3666* (CWorkspace: 0)
3667* (RWorkspace: need BDSPAC)
3668*
3669 CALL cbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,
3670 $ ldvt, a, lda, cdum, 1, rwork( irwork ),
3671 $ info )
3672 END IF
3673*
3674 END IF
3675*
3676 END IF
3677*
3678* Undo scaling if necessary
3679*
3680 IF( iscl.EQ.1 ) THEN
3681 IF( anrm.GT.bignum )
3682 $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
3683 $ ierr )
3684 IF( info.NE.0 .AND. anrm.GT.bignum )
3685 $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn-1, 1,
3686 $ rwork( ie ), minmn, ierr )
3687 IF( anrm.LT.smlnum )
3688 $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
3689 $ ierr )
3690 IF( info.NE.0 .AND. anrm.LT.smlnum )
3691 $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1,
3692 $ rwork( ie ), minmn, ierr )
3693 END IF
3694*
3695* Return optimal workspace in WORK(1)
3696*
3697 work( 1 ) = maxwrk
3698*
3699 RETURN
3700*
3701* End of CGESVD
3702*
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
Definition ilaenv.f:162
subroutine cbdsqr(uplo, n, ncvt, nru, ncc, d, e, vt, ldvt, u, ldu, c, ldc, rwork, info)
CBDSQR
Definition cbdsqr.f:222

◆ cgesvdq()

subroutine cgesvdq ( character joba,
character jobp,
character jobr,
character jobu,
character jobv,
integer m,
integer n,
complex, dimension( lda, * ) a,
integer lda,
real, dimension( * ) s,
complex, dimension( ldu, * ) u,
integer ldu,
complex, dimension( ldv, * ) v,
integer ldv,
integer numrank,
integer, dimension( * ) iwork,
integer liwork,
complex, dimension( * ) cwork,
integer lcwork,
real, dimension( * ) rwork,
integer lrwork,
integer info )

CGESVDQ computes the singular value decomposition (SVD) with a QR-Preconditioned QR SVD Method for GE matrices

Download CGESVDQ + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> CGESVDQ computes the singular value decomposition (SVD) of a complex
!> M-by-N matrix A, where M >= N. The SVD of A is written as
!>                                    [++]   [xx]   [x0]   [xx]
!>              A = U * SIGMA * V^*,  [++] = [xx] * [ox] * [xx]
!>                                    [++]   [xx]
!> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal
!> matrix, and V is an N-by-N unitary matrix. The diagonal elements
!> of SIGMA are the singular values of A. The columns of U and V are the
!> left and the right singular vectors of A, respectively.
!> 
Parameters
[in]JOBA
!>  JOBA is CHARACTER*1
!>  Specifies the level of accuracy in the computed SVD
!>  = 'A' The requested accuracy corresponds to having the backward
!>        error bounded by || delta A ||_F <= f(m,n) * EPS * || A ||_F,
!>        where EPS = SLAMCH('Epsilon'). This authorises CGESVDQ to
!>        truncate the computed triangular factor in a rank revealing
!>        QR factorization whenever the truncated part is below the
!>        threshold of the order of EPS * ||A||_F. This is aggressive
!>        truncation level.
!>  = 'M' Similarly as with 'A', but the truncation is more gentle: it
!>        is allowed only when there is a drop on the diagonal of the
!>        triangular factor in the QR factorization. This is medium
!>        truncation level.
!>  = 'H' High accuracy requested. No numerical rank determination based
!>        on the rank revealing QR factorization is attempted.
!>  = 'E' Same as 'H', and in addition the condition number of column
!>        scaled A is estimated and returned in  RWORK(1).
!>        N^(-1/4)*RWORK(1) <= ||pinv(A_scaled)||_2 <= N^(1/4)*RWORK(1)
!> 
[in]JOBP
!>  JOBP is CHARACTER*1
!>  = 'P' The rows of A are ordered in decreasing order with respect to
!>        ||A(i,:)||_\infty. This enhances numerical accuracy at the cost
!>        of extra data movement. Recommended for numerical robustness.
!>  = 'N' No row pivoting.
!> 
[in]JOBR
!>          JOBR is CHARACTER*1
!>          = 'T' After the initial pivoted QR factorization, CGESVD is applied to
!>          the adjoint R**H of the computed triangular factor R. This involves
!>          some extra data movement (matrix transpositions). Useful for
!>          experiments, research and development.
!>          = 'N' The triangular factor R is given as input to CGESVD. This may be
!>          preferred as it involves less data movement.
!> 
[in]JOBU
!>          JOBU is CHARACTER*1
!>          = 'A' All M left singular vectors are computed and returned in the
!>          matrix U. See the description of U.
!>          = 'S' or 'U' N = min(M,N) left singular vectors are computed and returned
!>          in the matrix U. See the description of U.
!>          = 'R' Numerical rank NUMRANK is determined and only NUMRANK left singular
!>          vectors are computed and returned in the matrix U.
!>          = 'F' The N left singular vectors are returned in factored form as the
!>          product of the Q factor from the initial QR factorization and the
!>          N left singular vectors of (R**H , 0)**H. If row pivoting is used,
!>          then the necessary information on the row pivoting is stored in
!>          IWORK(N+1:N+M-1).
!>          = 'N' The left singular vectors are not computed.
!> 
[in]JOBV
!>          JOBV is CHARACTER*1
!>          = 'A', 'V' All N right singular vectors are computed and returned in
!>          the matrix V.
!>          = 'R' Numerical rank NUMRANK is determined and only NUMRANK right singular
!>          vectors are computed and returned in the matrix V. This option is
!>          allowed only if JOBU = 'R' or JOBU = 'N'; otherwise it is illegal.
!>          = 'N' The right singular vectors are not computed.
!> 
[in]M
!>          M is INTEGER
!>          The number of rows of the input matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>          The number of columns of the input matrix A.  M >= N >= 0.
!> 
[in,out]A
!>          A is COMPLEX array of dimensions LDA x N
!>          On entry, the input matrix A.
!>          On exit, if JOBU .NE. 'N' or JOBV .NE. 'N', the lower triangle of A contains
!>          the Householder vectors as stored by CGEQP3. If JOBU = 'F', these Householder
!>          vectors together with CWORK(1:N) can be used to restore the Q factors from
!>          the initial pivoted QR factorization of A. See the description of U.
!> 
[in]LDA
!>          LDA is INTEGER.
!>          The leading dimension of the array A.  LDA >= max(1,M).
!> 
[out]S
!>          S is REAL array of dimension N.
!>          The singular values of A, ordered so that S(i) >= S(i+1).
!> 
[out]U
!>          U is COMPLEX array, dimension
!>          LDU x M if JOBU = 'A'; see the description of LDU. In this case,
!>          on exit, U contains the M left singular vectors.
!>          LDU x N if JOBU = 'S', 'U', 'R' ; see the description of LDU. In this
!>          case, U contains the leading N or the leading NUMRANK left singular vectors.
!>          LDU x N if JOBU = 'F' ; see the description of LDU. In this case U
!>          contains N x N unitary matrix that can be used to form the left
!>          singular vectors.
!>          If JOBU = 'N', U is not referenced.
!> 
[in]LDU
!>          LDU is INTEGER.
!>          The leading dimension of the array U.
!>          If JOBU = 'A', 'S', 'U', 'R',  LDU >= max(1,M).
!>          If JOBU = 'F',                 LDU >= max(1,N).
!>          Otherwise,                     LDU >= 1.
!> 
[out]V
!>          V is COMPLEX array, dimension
!>          LDV x N if JOBV = 'A', 'V', 'R' or if JOBA = 'E' .
!>          If JOBV = 'A', or 'V',  V contains the N-by-N unitary matrix  V**H;
!>          If JOBV = 'R', V contains the first NUMRANK rows of V**H (the right
!>          singular vectors, stored rowwise, of the NUMRANK largest singular values).
!>          If JOBV = 'N' and JOBA = 'E', V is used as a workspace.
!>          If JOBV = 'N', and JOBA.NE.'E', V is not referenced.
!> 
[in]LDV
!>          LDV is INTEGER
!>          The leading dimension of the array V.
!>          If JOBV = 'A', 'V', 'R',  or JOBA = 'E', LDV >= max(1,N).
!>          Otherwise,                               LDV >= 1.
!> 
[out]NUMRANK
!>          NUMRANK is INTEGER
!>          NUMRANK is the numerical rank first determined after the rank
!>          revealing QR factorization, following the strategy specified by the
!>          value of JOBA. If JOBV = 'R' and JOBU = 'R', only NUMRANK
!>          leading singular values and vectors are then requested in the call
!>          of CGESVD. The final value of NUMRANK might be further reduced if
!>          some singular values are computed as zeros.
!> 
[out]IWORK
!>          IWORK is INTEGER array, dimension (max(1, LIWORK)).
!>          On exit, IWORK(1:N) contains column pivoting permutation of the
!>          rank revealing QR factorization.
!>          If JOBP = 'P', IWORK(N+1:N+M-1) contains the indices of the sequence
!>          of row swaps used in row pivoting. These can be used to restore the
!>          left singular vectors in the case JOBU = 'F'.
!>
!>          If LIWORK, LCWORK, or LRWORK = -1, then on exit, if INFO = 0,
!>          IWORK(1) returns the minimal LIWORK.
!> 
[in]LIWORK
!>          LIWORK is INTEGER
!>          The dimension of the array IWORK.
!>          LIWORK >= N + M - 1,  if JOBP = 'P';
!>          LIWORK >= N           if JOBP = 'N'.
!>
!>          If LIWORK = -1, then a workspace query is assumed; the routine
!>          only calculates and returns the optimal and minimal sizes
!>          for the CWORK, IWORK, and RWORK arrays, and no error
!>          message related to LCWORK is issued by XERBLA.
!> 
[out]CWORK
!>          CWORK is COMPLEX array, dimension (max(2, LCWORK)), used as a workspace.
!>          On exit, if, on entry, LCWORK.NE.-1, CWORK(1:N) contains parameters
!>          needed to recover the Q factor from the QR factorization computed by
!>          CGEQP3.
!>
!>          If LIWORK, LCWORK, or LRWORK = -1, then on exit, if INFO = 0,
!>          CWORK(1) returns the optimal LCWORK, and
!>          CWORK(2) returns the minimal LCWORK.
!> 
[in,out]LCWORK
!>          LCWORK is INTEGER
!>          The dimension of the array CWORK. It is determined as follows:
!>          Let  LWQP3 = N+1,  LWCON = 2*N, and let
!>          LWUNQ = { MAX( N, 1 ),  if JOBU = 'R', 'S', or 'U'
!>                  { MAX( M, 1 ),  if JOBU = 'A'
!>          LWSVD = MAX( 3*N, 1 )
!>          LWLQF = MAX( N/2, 1 ), LWSVD2 = MAX( 3*(N/2), 1 ), LWUNLQ = MAX( N, 1 ),
!>          LWQRF = MAX( N/2, 1 ), LWUNQ2 = MAX( N, 1 )
!>          Then the minimal value of LCWORK is:
!>          = MAX( N + LWQP3, LWSVD )        if only the singular values are needed;
!>          = MAX( N + LWQP3, LWCON, LWSVD ) if only the singular values are needed,
!>                                   and a scaled condition estimate requested;
!>
!>          = N + MAX( LWQP3, LWSVD, LWUNQ ) if the singular values and the left
!>                                   singular vectors are requested;
!>          = N + MAX( LWQP3, LWCON, LWSVD, LWUNQ ) if the singular values and the left
!>                                   singular vectors are requested, and also
!>                                   a scaled condition estimate requested;
!>
!>          = N + MAX( LWQP3, LWSVD )        if the singular values and the right
!>                                   singular vectors are requested;
!>          = N + MAX( LWQP3, LWCON, LWSVD ) if the singular values and the right
!>                                   singular vectors are requested, and also
!>                                   a scaled condition etimate requested;
!>
!>          = N + MAX( LWQP3, LWSVD, LWUNQ ) if the full SVD is requested with JOBV = 'R';
!>                                   independent of JOBR;
!>          = N + MAX( LWQP3, LWCON, LWSVD, LWUNQ ) if the full SVD is requested,
!>                                   JOBV = 'R' and, also a scaled condition
!>                                   estimate requested; independent of JOBR;
!>          = MAX( N + MAX( LWQP3, LWSVD, LWUNQ ),
!>         N + MAX( LWQP3, N/2+LWLQF, N/2+LWSVD2, N/2+LWUNLQ, LWUNQ) ) if the
!>                         full SVD is requested with JOBV = 'A' or 'V', and
!>                         JOBR ='N'
!>          = MAX( N + MAX( LWQP3, LWCON, LWSVD, LWUNQ ),
!>         N + MAX( LWQP3, LWCON, N/2+LWLQF, N/2+LWSVD2, N/2+LWUNLQ, LWUNQ ) )
!>                         if the full SVD is requested with JOBV = 'A' or 'V', and
!>                         JOBR ='N', and also a scaled condition number estimate
!>                         requested.
!>          = MAX( N + MAX( LWQP3, LWSVD, LWUNQ ),
!>         N + MAX( LWQP3, N/2+LWQRF, N/2+LWSVD2, N/2+LWUNQ2, LWUNQ ) ) if the
!>                         full SVD is requested with JOBV = 'A', 'V', and JOBR ='T'
!>          = MAX( N + MAX( LWQP3, LWCON, LWSVD, LWUNQ ),
!>         N + MAX( LWQP3, LWCON, N/2+LWQRF, N/2+LWSVD2, N/2+LWUNQ2, LWUNQ ) )
!>                         if the full SVD is requested with JOBV = 'A', 'V' and
!>                         JOBR ='T', and also a scaled condition number estimate
!>                         requested.
!>          Finally, LCWORK must be at least two: LCWORK = MAX( 2, LCWORK ).
!>
!>          If LCWORK = -1, then a workspace query is assumed; the routine
!>          only calculates and returns the optimal and minimal sizes
!>          for the CWORK, IWORK, and RWORK arrays, and no error
!>          message related to LCWORK is issued by XERBLA.
!> 
[out]RWORK
!>          RWORK is REAL array, dimension (max(1, LRWORK)).
!>          On exit,
!>          1. If JOBA = 'E', RWORK(1) contains an estimate of the condition
!>          number of column scaled A. If A = C * D where D is diagonal and C
!>          has unit columns in the Euclidean norm, then, assuming full column rank,
!>          N^(-1/4) * RWORK(1) <= ||pinv(C)||_2 <= N^(1/4) * RWORK(1).
!>          Otherwise, RWORK(1) = -1.
!>          2. RWORK(2) contains the number of singular values computed as
!>          exact zeros in CGESVD applied to the upper triangular or trapezoidal
!>          R (from the initial QR factorization). In case of early exit (no call to
!>          CGESVD, such as in the case of zero matrix) RWORK(2) = -1.
!>
!>          If LIWORK, LCWORK, or LRWORK = -1, then on exit, if INFO = 0,
!>          RWORK(1) returns the minimal LRWORK.
!> 
[in]LRWORK
!>          LRWORK is INTEGER.
!>          The dimension of the array RWORK.
!>          If JOBP ='P', then LRWORK >= MAX(2, M, 5*N);
!>          Otherwise, LRWORK >= MAX(2, 5*N).
!>
!>          If LRWORK = -1, then a workspace query is assumed; the routine
!>          only calculates and returns the optimal and minimal sizes
!>          for the CWORK, IWORK, and RWORK arrays, and no error
!>          message related to LCWORK is issued by XERBLA.
!> 
[out]INFO
!>          INFO is INTEGER
!>          = 0:  successful exit.
!>          < 0:  if INFO = -i, the i-th argument had an illegal value.
!>          > 0:  if CBDSQR did not converge, INFO specifies how many superdiagonals
!>          of an intermediate bidiagonal form B (computed in CGESVD) did not
!>          converge to zero.
!> 
Further Details:
!>
!>   1. The data movement (matrix transpose) is coded using simple nested
!>   DO-loops because BLAS and LAPACK do not provide corresponding subroutines.
!>   Those DO-loops are easily identified in this source code - by the CONTINUE
!>   statements labeled with 11**. In an optimized version of this code, the
!>   nested DO loops should be replaced with calls to an optimized subroutine.
!>   2. This code scales A by 1/SQRT(M) if the largest ABS(A(i,j)) could cause
!>   column norm overflow. This is the minial precaution and it is left to the
!>   SVD routine (CGESVD) to do its own preemptive scaling if potential over-
!>   or underflows are detected. To avoid repeated scanning of the array A,
!>   an optimal implementation would do all necessary scaling before calling
!>   CGESVD and the scaling in CGESVD can be switched off.
!>   3. Other comments related to code optimization are given in comments in the
!>   code, enlosed in [[double brackets]].
!> 
Bugs, examples and comments
!>  Please report all bugs and send interesting examples and/or comments to
!>  drmac@math.hr. Thank you.
!> 
References
!>  [1] Zlatko Drmac, Algorithm 977: A QR-Preconditioned QR SVD Method for
!>      Computing the SVD with High Accuracy. ACM Trans. Math. Softw.
!>      44(1): 11:1-11:30 (2017)
!>
!>  SIGMA library, xGESVDQ section updated February 2016.
!>  Developed and coded by Zlatko Drmac, Department of Mathematics
!>  University of Zagreb, Croatia, drmac@math.hr
!> 
Contributors:
!> Developed and coded by Zlatko Drmac, Department of Mathematics
!>  University of Zagreb, Croatia, drmac@math.hr
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 410 of file cgesvdq.f.

413* .. Scalar Arguments ..
414 IMPLICIT NONE
415 CHARACTER JOBA, JOBP, JOBR, JOBU, JOBV
416 INTEGER M, N, LDA, LDU, LDV, NUMRANK, LIWORK, LCWORK, LRWORK,
417 $ INFO
418* ..
419* .. Array Arguments ..
420 COMPLEX A( LDA, * ), U( LDU, * ), V( LDV, * ), CWORK( * )
421 REAL S( * ), RWORK( * )
422 INTEGER IWORK( * )
423*
424* =====================================================================
425*
426* .. Parameters ..
427 REAL ZERO, ONE
428 parameter( zero = 0.0e0, one = 1.0e0 )
429 COMPLEX CZERO, CONE
430 parameter( czero = ( 0.0e0, 0.0e0 ), cone = ( 1.0e0, 0.0e0 ) )
431* ..
432* .. Local Scalars ..
433 INTEGER IERR, NR, N1, OPTRATIO, p, q
434 INTEGER LWCON, LWQP3, LWRK_CGELQF, LWRK_CGESVD, LWRK_CGESVD2,
435 $ LWRK_CGEQP3, LWRK_CGEQRF, LWRK_CUNMLQ, LWRK_CUNMQR,
436 $ LWRK_CUNMQR2, LWLQF, LWQRF, LWSVD, LWSVD2, LWUNQ,
437 $ LWUNQ2, LWUNLQ, MINWRK, MINWRK2, OPTWRK, OPTWRK2,
438 $ IMINWRK, RMINWRK
439 LOGICAL ACCLA, ACCLM, ACCLH, ASCALED, CONDA, DNTWU, DNTWV,
440 $ LQUERY, LSVC0, LSVEC, ROWPRM, RSVEC, RTRANS, WNTUA,
441 $ WNTUF, WNTUR, WNTUS, WNTVA, WNTVR
442 REAL BIG, EPSLN, RTMP, SCONDA, SFMIN
443 COMPLEX CTMP
444* ..
445* .. Local Arrays
446 COMPLEX CDUMMY(1)
447 REAL RDUMMY(1)
448* ..
449* .. External Subroutines (BLAS, LAPACK)
450 EXTERNAL cgelqf, cgeqp3, cgeqrf, cgesvd, clacpy, clapmt,
453* ..
454* .. External Functions (BLAS, LAPACK)
455 LOGICAL LSAME
456 INTEGER ISAMAX
457 REAL CLANGE, SCNRM2, SLAMCH
458 EXTERNAL clange, lsame, isamax, scnrm2, slamch
459* ..
460* .. Intrinsic Functions ..
461 INTRINSIC abs, conjg, max, min, real, sqrt
462* ..
463* .. Executable Statements ..
464*
465* Test the input arguments
466*
467 wntus = lsame( jobu, 'S' ) .OR. lsame( jobu, 'U' )
468 wntur = lsame( jobu, 'R' )
469 wntua = lsame( jobu, 'A' )
470 wntuf = lsame( jobu, 'F' )
471 lsvc0 = wntus .OR. wntur .OR. wntua
472 lsvec = lsvc0 .OR. wntuf
473 dntwu = lsame( jobu, 'N' )
474*
475 wntvr = lsame( jobv, 'R' )
476 wntva = lsame( jobv, 'A' ) .OR. lsame( jobv, 'V' )
477 rsvec = wntvr .OR. wntva
478 dntwv = lsame( jobv, 'N' )
479*
480 accla = lsame( joba, 'A' )
481 acclm = lsame( joba, 'M' )
482 conda = lsame( joba, 'E' )
483 acclh = lsame( joba, 'H' ) .OR. conda
484*
485 rowprm = lsame( jobp, 'P' )
486 rtrans = lsame( jobr, 'T' )
487*
488 IF ( rowprm ) THEN
489 iminwrk = max( 1, n + m - 1 )
490 rminwrk = max( 2, m, 5*n )
491 ELSE
492 iminwrk = max( 1, n )
493 rminwrk = max( 2, 5*n )
494 END IF
495 lquery = (liwork .EQ. -1 .OR. lcwork .EQ. -1 .OR. lrwork .EQ. -1)
496 info = 0
497 IF ( .NOT. ( accla .OR. acclm .OR. acclh ) ) THEN
498 info = -1
499 ELSE IF ( .NOT.( rowprm .OR. lsame( jobp, 'N' ) ) ) THEN
500 info = -2
501 ELSE IF ( .NOT.( rtrans .OR. lsame( jobr, 'N' ) ) ) THEN
502 info = -3
503 ELSE IF ( .NOT.( lsvec .OR. dntwu ) ) THEN
504 info = -4
505 ELSE IF ( wntur .AND. wntva ) THEN
506 info = -5
507 ELSE IF ( .NOT.( rsvec .OR. dntwv )) THEN
508 info = -5
509 ELSE IF ( m.LT.0 ) THEN
510 info = -6
511 ELSE IF ( ( n.LT.0 ) .OR. ( n.GT.m ) ) THEN
512 info = -7
513 ELSE IF ( lda.LT.max( 1, m ) ) THEN
514 info = -9
515 ELSE IF ( ldu.LT.1 .OR. ( lsvc0 .AND. ldu.LT.m ) .OR.
516 $ ( wntuf .AND. ldu.LT.n ) ) THEN
517 info = -12
518 ELSE IF ( ldv.LT.1 .OR. ( rsvec .AND. ldv.LT.n ) .OR.
519 $ ( conda .AND. ldv.LT.n ) ) THEN
520 info = -14
521 ELSE IF ( liwork .LT. iminwrk .AND. .NOT. lquery ) THEN
522 info = -17
523 END IF
524*
525*
526 IF ( info .EQ. 0 ) THEN
527*
528* Compute workspace
529* .. compute the minimal and the optimal workspace lengths
530* [[The expressions for computing the minimal and the optimal
531* values of LCWORK are written with a lot of redundancy and
532* can be simplified. However, this detailed form is easier for
533* maintenance and modifications of the code.]]
534*
535* .. minimal workspace length for CGEQP3 of an M x N matrix
536 lwqp3 = n+1
537* .. minimal workspace length for CUNMQR to build left singular vectors
538 IF ( wntus .OR. wntur ) THEN
539 lwunq = max( n , 1 )
540 ELSE IF ( wntua ) THEN
541 lwunq = max( m , 1 )
542 END IF
543* .. minimal workspace length for CPOCON of an N x N matrix
544 lwcon = 2 * n
545* .. CGESVD of an N x N matrix
546 lwsvd = max( 3 * n, 1 )
547 IF ( lquery ) THEN
548 CALL cgeqp3( m, n, a, lda, iwork, cdummy, cdummy, -1,
549 $ rdummy, ierr )
550 lwrk_cgeqp3 = int( cdummy(1) )
551 IF ( wntus .OR. wntur ) THEN
552 CALL cunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,
553 $ ldu, cdummy, -1, ierr )
554 lwrk_cunmqr = int( cdummy(1) )
555 ELSE IF ( wntua ) THEN
556 CALL cunmqr( 'L', 'N', m, m, n, a, lda, cdummy, u,
557 $ ldu, cdummy, -1, ierr )
558 lwrk_cunmqr = int( cdummy(1) )
559 ELSE
560 lwrk_cunmqr = 0
561 END IF
562 END IF
563 minwrk = 2
564 optwrk = 2
565 IF ( .NOT. (lsvec .OR. rsvec )) THEN
566* .. minimal and optimal sizes of the complex workspace if
567* only the singular values are requested
568 IF ( conda ) THEN
569 minwrk = max( n+lwqp3, lwcon, lwsvd )
570 ELSE
571 minwrk = max( n+lwqp3, lwsvd )
572 END IF
573 IF ( lquery ) THEN
574 CALL cgesvd( 'N', 'N', n, n, a, lda, s, u, ldu,
575 $ v, ldv, cdummy, -1, rdummy, ierr )
576 lwrk_cgesvd = int( cdummy(1) )
577 IF ( conda ) THEN
578 optwrk = max( n+lwrk_cgeqp3, n+lwcon, lwrk_cgesvd )
579 ELSE
580 optwrk = max( n+lwrk_cgeqp3, lwrk_cgesvd )
581 END IF
582 END IF
583 ELSE IF ( lsvec .AND. (.NOT.rsvec) ) THEN
584* .. minimal and optimal sizes of the complex workspace if the
585* singular values and the left singular vectors are requested
586 IF ( conda ) THEN
587 minwrk = n + max( lwqp3, lwcon, lwsvd, lwunq )
588 ELSE
589 minwrk = n + max( lwqp3, lwsvd, lwunq )
590 END IF
591 IF ( lquery ) THEN
592 IF ( rtrans ) THEN
593 CALL cgesvd( 'N', 'O', n, n, a, lda, s, u, ldu,
594 $ v, ldv, cdummy, -1, rdummy, ierr )
595 ELSE
596 CALL cgesvd( 'O', 'N', n, n, a, lda, s, u, ldu,
597 $ v, ldv, cdummy, -1, rdummy, ierr )
598 END IF
599 lwrk_cgesvd = int( cdummy(1) )
600 IF ( conda ) THEN
601 optwrk = n + max( lwrk_cgeqp3, lwcon, lwrk_cgesvd,
602 $ lwrk_cunmqr )
603 ELSE
604 optwrk = n + max( lwrk_cgeqp3, lwrk_cgesvd,
605 $ lwrk_cunmqr )
606 END IF
607 END IF
608 ELSE IF ( rsvec .AND. (.NOT.lsvec) ) THEN
609* .. minimal and optimal sizes of the complex workspace if the
610* singular values and the right singular vectors are requested
611 IF ( conda ) THEN
612 minwrk = n + max( lwqp3, lwcon, lwsvd )
613 ELSE
614 minwrk = n + max( lwqp3, lwsvd )
615 END IF
616 IF ( lquery ) THEN
617 IF ( rtrans ) THEN
618 CALL cgesvd( 'O', 'N', n, n, a, lda, s, u, ldu,
619 $ v, ldv, cdummy, -1, rdummy, ierr )
620 ELSE
621 CALL cgesvd( 'N', 'O', n, n, a, lda, s, u, ldu,
622 $ v, ldv, cdummy, -1, rdummy, ierr )
623 END IF
624 lwrk_cgesvd = int( cdummy(1) )
625 IF ( conda ) THEN
626 optwrk = n + max( lwrk_cgeqp3, lwcon, lwrk_cgesvd )
627 ELSE
628 optwrk = n + max( lwrk_cgeqp3, lwrk_cgesvd )
629 END IF
630 END IF
631 ELSE
632* .. minimal and optimal sizes of the complex workspace if the
633* full SVD is requested
634 IF ( rtrans ) THEN
635 minwrk = max( lwqp3, lwsvd, lwunq )
636 IF ( conda ) minwrk = max( minwrk, lwcon )
637 minwrk = minwrk + n
638 IF ( wntva ) THEN
639* .. minimal workspace length for N x N/2 CGEQRF
640 lwqrf = max( n/2, 1 )
641* .. minimal workspace length for N/2 x N/2 CGESVD
642 lwsvd2 = max( 3 * (n/2), 1 )
643 lwunq2 = max( n, 1 )
644 minwrk2 = max( lwqp3, n/2+lwqrf, n/2+lwsvd2,
645 $ n/2+lwunq2, lwunq )
646 IF ( conda ) minwrk2 = max( minwrk2, lwcon )
647 minwrk2 = n + minwrk2
648 minwrk = max( minwrk, minwrk2 )
649 END IF
650 ELSE
651 minwrk = max( lwqp3, lwsvd, lwunq )
652 IF ( conda ) minwrk = max( minwrk, lwcon )
653 minwrk = minwrk + n
654 IF ( wntva ) THEN
655* .. minimal workspace length for N/2 x N CGELQF
656 lwlqf = max( n/2, 1 )
657 lwsvd2 = max( 3 * (n/2), 1 )
658 lwunlq = max( n , 1 )
659 minwrk2 = max( lwqp3, n/2+lwlqf, n/2+lwsvd2,
660 $ n/2+lwunlq, lwunq )
661 IF ( conda ) minwrk2 = max( minwrk2, lwcon )
662 minwrk2 = n + minwrk2
663 minwrk = max( minwrk, minwrk2 )
664 END IF
665 END IF
666 IF ( lquery ) THEN
667 IF ( rtrans ) THEN
668 CALL cgesvd( 'O', 'A', n, n, a, lda, s, u, ldu,
669 $ v, ldv, cdummy, -1, rdummy, ierr )
670 lwrk_cgesvd = int( cdummy(1) )
671 optwrk = max(lwrk_cgeqp3,lwrk_cgesvd,lwrk_cunmqr)
672 IF ( conda ) optwrk = max( optwrk, lwcon )
673 optwrk = n + optwrk
674 IF ( wntva ) THEN
675 CALL cgeqrf(n,n/2,u,ldu,cdummy,cdummy,-1,ierr)
676 lwrk_cgeqrf = int( cdummy(1) )
677 CALL cgesvd( 'S', 'O', n/2,n/2, v,ldv, s, u,ldu,
678 $ v, ldv, cdummy, -1, rdummy, ierr )
679 lwrk_cgesvd2 = int( cdummy(1) )
680 CALL cunmqr( 'R', 'C', n, n, n/2, u, ldu, cdummy,
681 $ v, ldv, cdummy, -1, ierr )
682 lwrk_cunmqr2 = int( cdummy(1) )
683 optwrk2 = max( lwrk_cgeqp3, n/2+lwrk_cgeqrf,
684 $ n/2+lwrk_cgesvd2, n/2+lwrk_cunmqr2 )
685 IF ( conda ) optwrk2 = max( optwrk2, lwcon )
686 optwrk2 = n + optwrk2
687 optwrk = max( optwrk, optwrk2 )
688 END IF
689 ELSE
690 CALL cgesvd( 'S', 'O', n, n, a, lda, s, u, ldu,
691 $ v, ldv, cdummy, -1, rdummy, ierr )
692 lwrk_cgesvd = int( cdummy(1) )
693 optwrk = max(lwrk_cgeqp3,lwrk_cgesvd,lwrk_cunmqr)
694 IF ( conda ) optwrk = max( optwrk, lwcon )
695 optwrk = n + optwrk
696 IF ( wntva ) THEN
697 CALL cgelqf(n/2,n,u,ldu,cdummy,cdummy,-1,ierr)
698 lwrk_cgelqf = int( cdummy(1) )
699 CALL cgesvd( 'S','O', n/2,n/2, v, ldv, s, u, ldu,
700 $ v, ldv, cdummy, -1, rdummy, ierr )
701 lwrk_cgesvd2 = int( cdummy(1) )
702 CALL cunmlq( 'R', 'N', n, n, n/2, u, ldu, cdummy,
703 $ v, ldv, cdummy,-1,ierr )
704 lwrk_cunmlq = int( cdummy(1) )
705 optwrk2 = max( lwrk_cgeqp3, n/2+lwrk_cgelqf,
706 $ n/2+lwrk_cgesvd2, n/2+lwrk_cunmlq )
707 IF ( conda ) optwrk2 = max( optwrk2, lwcon )
708 optwrk2 = n + optwrk2
709 optwrk = max( optwrk, optwrk2 )
710 END IF
711 END IF
712 END IF
713 END IF
714*
715 minwrk = max( 2, minwrk )
716 optwrk = max( 2, optwrk )
717 IF ( lcwork .LT. minwrk .AND. (.NOT.lquery) ) info = -19
718*
719 END IF
720*
721 IF (info .EQ. 0 .AND. lrwork .LT. rminwrk .AND. .NOT. lquery) THEN
722 info = -21
723 END IF
724 IF( info.NE.0 ) THEN
725 CALL xerbla( 'CGESVDQ', -info )
726 RETURN
727 ELSE IF ( lquery ) THEN
728*
729* Return optimal workspace
730*
731 iwork(1) = iminwrk
732 cwork(1) = optwrk
733 cwork(2) = minwrk
734 rwork(1) = rminwrk
735 RETURN
736 END IF
737*
738* Quick return if the matrix is void.
739*
740 IF( ( m.EQ.0 ) .OR. ( n.EQ.0 ) ) THEN
741* .. all output is void.
742 RETURN
743 END IF
744*
745 big = slamch('O')
746 ascaled = .false.
747 IF ( rowprm ) THEN
748* .. reordering the rows in decreasing sequence in the
749* ell-infinity norm - this enhances numerical robustness in
750* the case of differently scaled rows.
751 DO 1904 p = 1, m
752* RWORK(p) = ABS( A(p,ICAMAX(N,A(p,1),LDA)) )
753* [[CLANGE will return NaN if an entry of the p-th row is Nan]]
754 rwork(p) = clange( 'M', 1, n, a(p,1), lda, rdummy )
755* .. check for NaN's and Inf's
756 IF ( ( rwork(p) .NE. rwork(p) ) .OR.
757 $ ( (rwork(p)*zero) .NE. zero ) ) THEN
758 info = - 8
759 CALL xerbla( 'CGESVDQ', -info )
760 RETURN
761 END IF
762 1904 CONTINUE
763 DO 1952 p = 1, m - 1
764 q = isamax( m-p+1, rwork(p), 1 ) + p - 1
765 iwork(n+p) = q
766 IF ( p .NE. q ) THEN
767 rtmp = rwork(p)
768 rwork(p) = rwork(q)
769 rwork(q) = rtmp
770 END IF
771 1952 CONTINUE
772*
773 IF ( rwork(1) .EQ. zero ) THEN
774* Quick return: A is the M x N zero matrix.
775 numrank = 0
776 CALL slaset( 'G', n, 1, zero, zero, s, n )
777 IF ( wntus ) CALL claset('G', m, n, czero, cone, u, ldu)
778 IF ( wntua ) CALL claset('G', m, m, czero, cone, u, ldu)
779 IF ( wntva ) CALL claset('G', n, n, czero, cone, v, ldv)
780 IF ( wntuf ) THEN
781 CALL claset( 'G', n, 1, czero, czero, cwork, n )
782 CALL claset( 'G', m, n, czero, cone, u, ldu )
783 END IF
784 DO 5001 p = 1, n
785 iwork(p) = p
786 5001 CONTINUE
787 IF ( rowprm ) THEN
788 DO 5002 p = n + 1, n + m - 1
789 iwork(p) = p - n
790 5002 CONTINUE
791 END IF
792 IF ( conda ) rwork(1) = -1
793 rwork(2) = -1
794 RETURN
795 END IF
796*
797 IF ( rwork(1) .GT. big / sqrt(real(m)) ) THEN
798* .. to prevent overflow in the QR factorization, scale the
799* matrix by 1/sqrt(M) if too large entry detected
800 CALL clascl('G',0,0,sqrt(real(m)),one, m,n, a,lda, ierr)
801 ascaled = .true.
802 END IF
803 CALL claswp( n, a, lda, 1, m-1, iwork(n+1), 1 )
804 END IF
805*
806* .. At this stage, preemptive scaling is done only to avoid column
807* norms overflows during the QR factorization. The SVD procedure should
808* have its own scaling to save the singular values from overflows and
809* underflows. That depends on the SVD procedure.
810*
811 IF ( .NOT.rowprm ) THEN
812 rtmp = clange( 'M', m, n, a, lda, rwork )
813 IF ( ( rtmp .NE. rtmp ) .OR.
814 $ ( (rtmp*zero) .NE. zero ) ) THEN
815 info = - 8
816 CALL xerbla( 'CGESVDQ', -info )
817 RETURN
818 END IF
819 IF ( rtmp .GT. big / sqrt(real(m)) ) THEN
820* .. to prevent overflow in the QR factorization, scale the
821* matrix by 1/sqrt(M) if too large entry detected
822 CALL clascl('G',0,0, sqrt(real(m)),one, m,n, a,lda, ierr)
823 ascaled = .true.
824 END IF
825 END IF
826*
827* .. QR factorization with column pivoting
828*
829* A * P = Q * [ R ]
830* [ 0 ]
831*
832 DO 1963 p = 1, n
833* .. all columns are free columns
834 iwork(p) = 0
835 1963 CONTINUE
836 CALL cgeqp3( m, n, a, lda, iwork, cwork, cwork(n+1), lcwork-n,
837 $ rwork, ierr )
838*
839* If the user requested accuracy level allows truncation in the
840* computed upper triangular factor, the matrix R is examined and,
841* if possible, replaced with its leading upper trapezoidal part.
842*
843 epsln = slamch('E')
844 sfmin = slamch('S')
845* SMALL = SFMIN / EPSLN
846 nr = n
847*
848 IF ( accla ) THEN
849*
850* Standard absolute error bound suffices. All sigma_i with
851* sigma_i < N*EPS*||A||_F are flushed to zero. This is an
852* aggressive enforcement of lower numerical rank by introducing a
853* backward error of the order of N*EPS*||A||_F.
854 nr = 1
855 rtmp = sqrt(real(n))*epsln
856 DO 3001 p = 2, n
857 IF ( abs(a(p,p)) .LT. (rtmp*abs(a(1,1))) ) GO TO 3002
858 nr = nr + 1
859 3001 CONTINUE
860 3002 CONTINUE
861*
862 ELSEIF ( acclm ) THEN
863* .. similarly as above, only slightly more gentle (less aggressive).
864* Sudden drop on the diagonal of R is used as the criterion for being
865* close-to-rank-deficient. The threshold is set to EPSLN=SLAMCH('E').
866* [[This can be made more flexible by replacing this hard-coded value
867* with a user specified threshold.]] Also, the values that underflow
868* will be truncated.
869 nr = 1
870 DO 3401 p = 2, n
871 IF ( ( abs(a(p,p)) .LT. (epsln*abs(a(p-1,p-1))) ) .OR.
872 $ ( abs(a(p,p)) .LT. sfmin ) ) GO TO 3402
873 nr = nr + 1
874 3401 CONTINUE
875 3402 CONTINUE
876*
877 ELSE
878* .. RRQR not authorized to determine numerical rank except in the
879* obvious case of zero pivots.
880* .. inspect R for exact zeros on the diagonal;
881* R(i,i)=0 => R(i:N,i:N)=0.
882 nr = 1
883 DO 3501 p = 2, n
884 IF ( abs(a(p,p)) .EQ. zero ) GO TO 3502
885 nr = nr + 1
886 3501 CONTINUE
887 3502 CONTINUE
888*
889 IF ( conda ) THEN
890* Estimate the scaled condition number of A. Use the fact that it is
891* the same as the scaled condition number of R.
892* .. V is used as workspace
893 CALL clacpy( 'U', n, n, a, lda, v, ldv )
894* Only the leading NR x NR submatrix of the triangular factor
895* is considered. Only if NR=N will this give a reliable error
896* bound. However, even for NR < N, this can be used on an
897* expert level and obtain useful information in the sense of
898* perturbation theory.
899 DO 3053 p = 1, nr
900 rtmp = scnrm2( p, v(1,p), 1 )
901 CALL csscal( p, one/rtmp, v(1,p), 1 )
902 3053 CONTINUE
903 IF ( .NOT. ( lsvec .OR. rsvec ) ) THEN
904 CALL cpocon( 'U', nr, v, ldv, one, rtmp,
905 $ cwork, rwork, ierr )
906 ELSE
907 CALL cpocon( 'U', nr, v, ldv, one, rtmp,
908 $ cwork(n+1), rwork, ierr )
909 END IF
910 sconda = one / sqrt(rtmp)
911* For NR=N, SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1),
912* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
913* See the reference [1] for more details.
914 END IF
915*
916 ENDIF
917*
918 IF ( wntur ) THEN
919 n1 = nr
920 ELSE IF ( wntus .OR. wntuf) THEN
921 n1 = n
922 ELSE IF ( wntua ) THEN
923 n1 = m
924 END IF
925*
926 IF ( .NOT. ( rsvec .OR. lsvec ) ) THEN
927*.......................................................................
928* .. only the singular values are requested
929*.......................................................................
930 IF ( rtrans ) THEN
931*
932* .. compute the singular values of R**H = [A](1:NR,1:N)**H
933* .. set the lower triangle of [A] to [A](1:NR,1:N)**H and
934* the upper triangle of [A] to zero.
935 DO 1146 p = 1, min( n, nr )
936 a(p,p) = conjg(a(p,p))
937 DO 1147 q = p + 1, n
938 a(q,p) = conjg(a(p,q))
939 IF ( q .LE. nr ) a(p,q) = czero
940 1147 CONTINUE
941 1146 CONTINUE
942*
943 CALL cgesvd( 'N', 'N', n, nr, a, lda, s, u, ldu,
944 $ v, ldv, cwork, lcwork, rwork, info )
945*
946 ELSE
947*
948* .. compute the singular values of R = [A](1:NR,1:N)
949*
950 IF ( nr .GT. 1 )
951 $ CALL claset( 'L', nr-1,nr-1, czero,czero, a(2,1), lda )
952 CALL cgesvd( 'N', 'N', nr, n, a, lda, s, u, ldu,
953 $ v, ldv, cwork, lcwork, rwork, info )
954*
955 END IF
956*
957 ELSE IF ( lsvec .AND. ( .NOT. rsvec) ) THEN
958*.......................................................................
959* .. the singular values and the left singular vectors requested
960*.......................................................................""""""""
961 IF ( rtrans ) THEN
962* .. apply CGESVD to R**H
963* .. copy R**H into [U] and overwrite [U] with the right singular
964* vectors of R
965 DO 1192 p = 1, nr
966 DO 1193 q = p, n
967 u(q,p) = conjg(a(p,q))
968 1193 CONTINUE
969 1192 CONTINUE
970 IF ( nr .GT. 1 )
971 $ CALL claset( 'U', nr-1,nr-1, czero,czero, u(1,2), ldu )
972* .. the left singular vectors not computed, the NR right singular
973* vectors overwrite [U](1:NR,1:NR) as conjugate transposed. These
974* will be pre-multiplied by Q to build the left singular vectors of A.
975 CALL cgesvd( 'N', 'O', n, nr, u, ldu, s, u, ldu,
976 $ u, ldu, cwork(n+1), lcwork-n, rwork, info )
977*
978 DO 1119 p = 1, nr
979 u(p,p) = conjg(u(p,p))
980 DO 1120 q = p + 1, nr
981 ctmp = conjg(u(q,p))
982 u(q,p) = conjg(u(p,q))
983 u(p,q) = ctmp
984 1120 CONTINUE
985 1119 CONTINUE
986*
987 ELSE
988* .. apply CGESVD to R
989* .. copy R into [U] and overwrite [U] with the left singular vectors
990 CALL clacpy( 'U', nr, n, a, lda, u, ldu )
991 IF ( nr .GT. 1 )
992 $ CALL claset( 'L', nr-1, nr-1, czero, czero, u(2,1), ldu )
993* .. the right singular vectors not computed, the NR left singular
994* vectors overwrite [U](1:NR,1:NR)
995 CALL cgesvd( 'O', 'N', nr, n, u, ldu, s, u, ldu,
996 $ v, ldv, cwork(n+1), lcwork-n, rwork, info )
997* .. now [U](1:NR,1:NR) contains the NR left singular vectors of
998* R. These will be pre-multiplied by Q to build the left singular
999* vectors of A.
1000 END IF
1001*
1002* .. assemble the left singular vector matrix U of dimensions
1003* (M x NR) or (M x N) or (M x M).
1004 IF ( ( nr .LT. m ) .AND. ( .NOT.wntuf ) ) THEN
1005 CALL claset('A', m-nr, nr, czero, czero, u(nr+1,1), ldu)
1006 IF ( nr .LT. n1 ) THEN
1007 CALL claset( 'A',nr,n1-nr,czero,czero,u(1,nr+1), ldu )
1008 CALL claset( 'A',m-nr,n1-nr,czero,cone,
1009 $ u(nr+1,nr+1), ldu )
1010 END IF
1011 END IF
1012*
1013* The Q matrix from the first QRF is built into the left singular
1014* vectors matrix U.
1015*
1016 IF ( .NOT.wntuf )
1017 $ CALL cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,
1018 $ ldu, cwork(n+1), lcwork-n, ierr )
1019 IF ( rowprm .AND. .NOT.wntuf )
1020 $ CALL claswp( n1, u, ldu, 1, m-1, iwork(n+1), -1 )
1021*
1022 ELSE IF ( rsvec .AND. ( .NOT. lsvec ) ) THEN
1023*.......................................................................
1024* .. the singular values and the right singular vectors requested
1025*.......................................................................
1026 IF ( rtrans ) THEN
1027* .. apply CGESVD to R**H
1028* .. copy R**H into V and overwrite V with the left singular vectors
1029 DO 1165 p = 1, nr
1030 DO 1166 q = p, n
1031 v(q,p) = conjg(a(p,q))
1032 1166 CONTINUE
1033 1165 CONTINUE
1034 IF ( nr .GT. 1 )
1035 $ CALL claset( 'U', nr-1,nr-1, czero,czero, v(1,2), ldv )
1036* .. the left singular vectors of R**H overwrite V, the right singular
1037* vectors not computed
1038 IF ( wntvr .OR. ( nr .EQ. n ) ) THEN
1039 CALL cgesvd( 'O', 'N', n, nr, v, ldv, s, u, ldu,
1040 $ u, ldu, cwork(n+1), lcwork-n, rwork, info )
1041*
1042 DO 1121 p = 1, nr
1043 v(p,p) = conjg(v(p,p))
1044 DO 1122 q = p + 1, nr
1045 ctmp = conjg(v(q,p))
1046 v(q,p) = conjg(v(p,q))
1047 v(p,q) = ctmp
1048 1122 CONTINUE
1049 1121 CONTINUE
1050*
1051 IF ( nr .LT. n ) THEN
1052 DO 1103 p = 1, nr
1053 DO 1104 q = nr + 1, n
1054 v(p,q) = conjg(v(q,p))
1055 1104 CONTINUE
1056 1103 CONTINUE
1057 END IF
1058 CALL clapmt( .false., nr, n, v, ldv, iwork )
1059 ELSE
1060* .. need all N right singular vectors and NR < N
1061* [!] This is simple implementation that augments [V](1:N,1:NR)
1062* by padding a zero block. In the case NR << N, a more efficient
1063* way is to first use the QR factorization. For more details
1064* how to implement this, see the " FULL SVD " branch.
1065 CALL claset('G', n, n-nr, czero, czero, v(1,nr+1), ldv)
1066 CALL cgesvd( 'O', 'N', n, n, v, ldv, s, u, ldu,
1067 $ u, ldu, cwork(n+1), lcwork-n, rwork, info )
1068*
1069 DO 1123 p = 1, n
1070 v(p,p) = conjg(v(p,p))
1071 DO 1124 q = p + 1, n
1072 ctmp = conjg(v(q,p))
1073 v(q,p) = conjg(v(p,q))
1074 v(p,q) = ctmp
1075 1124 CONTINUE
1076 1123 CONTINUE
1077 CALL clapmt( .false., n, n, v, ldv, iwork )
1078 END IF
1079*
1080 ELSE
1081* .. aply CGESVD to R
1082* .. copy R into V and overwrite V with the right singular vectors
1083 CALL clacpy( 'U', nr, n, a, lda, v, ldv )
1084 IF ( nr .GT. 1 )
1085 $ CALL claset( 'L', nr-1, nr-1, czero, czero, v(2,1), ldv )
1086* .. the right singular vectors overwrite V, the NR left singular
1087* vectors stored in U(1:NR,1:NR)
1088 IF ( wntvr .OR. ( nr .EQ. n ) ) THEN
1089 CALL cgesvd( 'N', 'O', nr, n, v, ldv, s, u, ldu,
1090 $ v, ldv, cwork(n+1), lcwork-n, rwork, info )
1091 CALL clapmt( .false., nr, n, v, ldv, iwork )
1092* .. now [V](1:NR,1:N) contains V(1:N,1:NR)**H
1093 ELSE
1094* .. need all N right singular vectors and NR < N
1095* [!] This is simple implementation that augments [V](1:NR,1:N)
1096* by padding a zero block. In the case NR << N, a more efficient
1097* way is to first use the LQ factorization. For more details
1098* how to implement this, see the " FULL SVD " branch.
1099 CALL claset('G', n-nr, n, czero,czero, v(nr+1,1), ldv)
1100 CALL cgesvd( 'N', 'O', n, n, v, ldv, s, u, ldu,
1101 $ v, ldv, cwork(n+1), lcwork-n, rwork, info )
1102 CALL clapmt( .false., n, n, v, ldv, iwork )
1103 END IF
1104* .. now [V] contains the adjoint of the matrix of the right singular
1105* vectors of A.
1106 END IF
1107*
1108 ELSE
1109*.......................................................................
1110* .. FULL SVD requested
1111*.......................................................................
1112 IF ( rtrans ) THEN
1113*
1114* .. apply CGESVD to R**H [[this option is left for R&D&T]]
1115*
1116 IF ( wntvr .OR. ( nr .EQ. n ) ) THEN
1117* .. copy R**H into [V] and overwrite [V] with the left singular
1118* vectors of R**H
1119 DO 1168 p = 1, nr
1120 DO 1169 q = p, n
1121 v(q,p) = conjg(a(p,q))
1122 1169 CONTINUE
1123 1168 CONTINUE
1124 IF ( nr .GT. 1 )
1125 $ CALL claset( 'U', nr-1,nr-1, czero,czero, v(1,2), ldv )
1126*
1127* .. the left singular vectors of R**H overwrite [V], the NR right
1128* singular vectors of R**H stored in [U](1:NR,1:NR) as conjugate
1129* transposed
1130 CALL cgesvd( 'O', 'A', n, nr, v, ldv, s, v, ldv,
1131 $ u, ldu, cwork(n+1), lcwork-n, rwork, info )
1132* .. assemble V
1133 DO 1115 p = 1, nr
1134 v(p,p) = conjg(v(p,p))
1135 DO 1116 q = p + 1, nr
1136 ctmp = conjg(v(q,p))
1137 v(q,p) = conjg(v(p,q))
1138 v(p,q) = ctmp
1139 1116 CONTINUE
1140 1115 CONTINUE
1141 IF ( nr .LT. n ) THEN
1142 DO 1101 p = 1, nr
1143 DO 1102 q = nr+1, n
1144 v(p,q) = conjg(v(q,p))
1145 1102 CONTINUE
1146 1101 CONTINUE
1147 END IF
1148 CALL clapmt( .false., nr, n, v, ldv, iwork )
1149*
1150 DO 1117 p = 1, nr
1151 u(p,p) = conjg(u(p,p))
1152 DO 1118 q = p + 1, nr
1153 ctmp = conjg(u(q,p))
1154 u(q,p) = conjg(u(p,q))
1155 u(p,q) = ctmp
1156 1118 CONTINUE
1157 1117 CONTINUE
1158*
1159 IF ( ( nr .LT. m ) .AND. .NOT.(wntuf)) THEN
1160 CALL claset('A', m-nr,nr, czero,czero, u(nr+1,1), ldu)
1161 IF ( nr .LT. n1 ) THEN
1162 CALL claset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu)
1163 CALL claset( 'A',m-nr,n1-nr,czero,cone,
1164 $ u(nr+1,nr+1), ldu )
1165 END IF
1166 END IF
1167*
1168 ELSE
1169* .. need all N right singular vectors and NR < N
1170* .. copy R**H into [V] and overwrite [V] with the left singular
1171* vectors of R**H
1172* [[The optimal ratio N/NR for using QRF instead of padding
1173* with zeros. Here hard coded to 2; it must be at least
1174* two due to work space constraints.]]
1175* OPTRATIO = ILAENV(6, 'CGESVD', 'S' // 'O', NR,N,0,0)
1176* OPTRATIO = MAX( OPTRATIO, 2 )
1177 optratio = 2
1178 IF ( optratio*nr .GT. n ) THEN
1179 DO 1198 p = 1, nr
1180 DO 1199 q = p, n
1181 v(q,p) = conjg(a(p,q))
1182 1199 CONTINUE
1183 1198 CONTINUE
1184 IF ( nr .GT. 1 )
1185 $ CALL claset('U',nr-1,nr-1, czero,czero, v(1,2),ldv)
1186*
1187 CALL claset('A',n,n-nr,czero,czero,v(1,nr+1),ldv)
1188 CALL cgesvd( 'O', 'A', n, n, v, ldv, s, v, ldv,
1189 $ u, ldu, cwork(n+1), lcwork-n, rwork, info )
1190*
1191 DO 1113 p = 1, n
1192 v(p,p) = conjg(v(p,p))
1193 DO 1114 q = p + 1, n
1194 ctmp = conjg(v(q,p))
1195 v(q,p) = conjg(v(p,q))
1196 v(p,q) = ctmp
1197 1114 CONTINUE
1198 1113 CONTINUE
1199 CALL clapmt( .false., n, n, v, ldv, iwork )
1200* .. assemble the left singular vector matrix U of dimensions
1201* (M x N1), i.e. (M x N) or (M x M).
1202*
1203 DO 1111 p = 1, n
1204 u(p,p) = conjg(u(p,p))
1205 DO 1112 q = p + 1, n
1206 ctmp = conjg(u(q,p))
1207 u(q,p) = conjg(u(p,q))
1208 u(p,q) = ctmp
1209 1112 CONTINUE
1210 1111 CONTINUE
1211*
1212 IF ( ( n .LT. m ) .AND. .NOT.(wntuf)) THEN
1213 CALL claset('A',m-n,n,czero,czero,u(n+1,1),ldu)
1214 IF ( n .LT. n1 ) THEN
1215 CALL claset('A',n,n1-n,czero,czero,u(1,n+1),ldu)
1216 CALL claset('A',m-n,n1-n,czero,cone,
1217 $ u(n+1,n+1), ldu )
1218 END IF
1219 END IF
1220 ELSE
1221* .. copy R**H into [U] and overwrite [U] with the right
1222* singular vectors of R
1223 DO 1196 p = 1, nr
1224 DO 1197 q = p, n
1225 u(q,nr+p) = conjg(a(p,q))
1226 1197 CONTINUE
1227 1196 CONTINUE
1228 IF ( nr .GT. 1 )
1229 $ CALL claset('U',nr-1,nr-1,czero,czero,u(1,nr+2),ldu)
1230 CALL cgeqrf( n, nr, u(1,nr+1), ldu, cwork(n+1),
1231 $ cwork(n+nr+1), lcwork-n-nr, ierr )
1232 DO 1143 p = 1, nr
1233 DO 1144 q = 1, n
1234 v(q,p) = conjg(u(p,nr+q))
1235 1144 CONTINUE
1236 1143 CONTINUE
1237 CALL claset('U',nr-1,nr-1,czero,czero,v(1,2),ldv)
1238 CALL cgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,
1239 $ v,ldv, cwork(n+nr+1),lcwork-n-nr,rwork, info )
1240 CALL claset('A',n-nr,nr,czero,czero,v(nr+1,1),ldv)
1241 CALL claset('A',nr,n-nr,czero,czero,v(1,nr+1),ldv)
1242 CALL claset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv)
1243 CALL cunmqr('R','C', n, n, nr, u(1,nr+1), ldu,
1244 $ cwork(n+1),v,ldv,cwork(n+nr+1),lcwork-n-nr,ierr)
1245 CALL clapmt( .false., n, n, v, ldv, iwork )
1246* .. assemble the left singular vector matrix U of dimensions
1247* (M x NR) or (M x N) or (M x M).
1248 IF ( ( nr .LT. m ) .AND. .NOT.(wntuf)) THEN
1249 CALL claset('A',m-nr,nr,czero,czero,u(nr+1,1),ldu)
1250 IF ( nr .LT. n1 ) THEN
1251 CALL claset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu)
1252 CALL claset( 'A',m-nr,n1-nr,czero,cone,
1253 $ u(nr+1,nr+1),ldu)
1254 END IF
1255 END IF
1256 END IF
1257 END IF
1258*
1259 ELSE
1260*
1261* .. apply CGESVD to R [[this is the recommended option]]
1262*
1263 IF ( wntvr .OR. ( nr .EQ. n ) ) THEN
1264* .. copy R into [V] and overwrite V with the right singular vectors
1265 CALL clacpy( 'U', nr, n, a, lda, v, ldv )
1266 IF ( nr .GT. 1 )
1267 $ CALL claset( 'L', nr-1,nr-1, czero,czero, v(2,1), ldv )
1268* .. the right singular vectors of R overwrite [V], the NR left
1269* singular vectors of R stored in [U](1:NR,1:NR)
1270 CALL cgesvd( 'S', 'O', nr, n, v, ldv, s, u, ldu,
1271 $ v, ldv, cwork(n+1), lcwork-n, rwork, info )
1272 CALL clapmt( .false., nr, n, v, ldv, iwork )
1273* .. now [V](1:NR,1:N) contains V(1:N,1:NR)**H
1274* .. assemble the left singular vector matrix U of dimensions
1275* (M x NR) or (M x N) or (M x M).
1276 IF ( ( nr .LT. m ) .AND. .NOT.(wntuf)) THEN
1277 CALL claset('A', m-nr,nr, czero,czero, u(nr+1,1), ldu)
1278 IF ( nr .LT. n1 ) THEN
1279 CALL claset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu)
1280 CALL claset( 'A',m-nr,n1-nr,czero,cone,
1281 $ u(nr+1,nr+1), ldu )
1282 END IF
1283 END IF
1284*
1285 ELSE
1286* .. need all N right singular vectors and NR < N
1287* .. the requested number of the left singular vectors
1288* is then N1 (N or M)
1289* [[The optimal ratio N/NR for using LQ instead of padding
1290* with zeros. Here hard coded to 2; it must be at least
1291* two due to work space constraints.]]
1292* OPTRATIO = ILAENV(6, 'CGESVD', 'S' // 'O', NR,N,0,0)
1293* OPTRATIO = MAX( OPTRATIO, 2 )
1294 optratio = 2
1295 IF ( optratio * nr .GT. n ) THEN
1296 CALL clacpy( 'U', nr, n, a, lda, v, ldv )
1297 IF ( nr .GT. 1 )
1298 $ CALL claset('L', nr-1,nr-1, czero,czero, v(2,1),ldv)
1299* .. the right singular vectors of R overwrite [V], the NR left
1300* singular vectors of R stored in [U](1:NR,1:NR)
1301 CALL claset('A', n-nr,n, czero,czero, v(nr+1,1),ldv)
1302 CALL cgesvd( 'S', 'O', n, n, v, ldv, s, u, ldu,
1303 $ v, ldv, cwork(n+1), lcwork-n, rwork, info )
1304 CALL clapmt( .false., n, n, v, ldv, iwork )
1305* .. now [V] contains the adjoint of the matrix of the right
1306* singular vectors of A. The leading N left singular vectors
1307* are in [U](1:N,1:N)
1308* .. assemble the left singular vector matrix U of dimensions
1309* (M x N1), i.e. (M x N) or (M x M).
1310 IF ( ( n .LT. m ) .AND. .NOT.(wntuf)) THEN
1311 CALL claset('A',m-n,n,czero,czero,u(n+1,1),ldu)
1312 IF ( n .LT. n1 ) THEN
1313 CALL claset('A',n,n1-n,czero,czero,u(1,n+1),ldu)
1314 CALL claset( 'A',m-n,n1-n,czero,cone,
1315 $ u(n+1,n+1), ldu )
1316 END IF
1317 END IF
1318 ELSE
1319 CALL clacpy( 'U', nr, n, a, lda, u(nr+1,1), ldu )
1320 IF ( nr .GT. 1 )
1321 $ CALL claset('L',nr-1,nr-1,czero,czero,u(nr+2,1),ldu)
1322 CALL cgelqf( nr, n, u(nr+1,1), ldu, cwork(n+1),
1323 $ cwork(n+nr+1), lcwork-n-nr, ierr )
1324 CALL clacpy('L',nr,nr,u(nr+1,1),ldu,v,ldv)
1325 IF ( nr .GT. 1 )
1326 $ CALL claset('U',nr-1,nr-1,czero,czero,v(1,2),ldv)
1327 CALL cgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,
1328 $ v, ldv, cwork(n+nr+1), lcwork-n-nr, rwork, info )
1329 CALL claset('A',n-nr,nr,czero,czero,v(nr+1,1),ldv)
1330 CALL claset('A',nr,n-nr,czero,czero,v(1,nr+1),ldv)
1331 CALL claset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv)
1332 CALL cunmlq('R','N',n,n,nr,u(nr+1,1),ldu,cwork(n+1),
1333 $ v, ldv, cwork(n+nr+1),lcwork-n-nr,ierr)
1334 CALL clapmt( .false., n, n, v, ldv, iwork )
1335* .. assemble the left singular vector matrix U of dimensions
1336* (M x NR) or (M x N) or (M x M).
1337 IF ( ( nr .LT. m ) .AND. .NOT.(wntuf)) THEN
1338 CALL claset('A',m-nr,nr,czero,czero,u(nr+1,1),ldu)
1339 IF ( nr .LT. n1 ) THEN
1340 CALL claset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu)
1341 CALL claset( 'A',m-nr,n1-nr,czero,cone,
1342 $ u(nr+1,nr+1), ldu )
1343 END IF
1344 END IF
1345 END IF
1346 END IF
1347* .. end of the "R**H or R" branch
1348 END IF
1349*
1350* The Q matrix from the first QRF is built into the left singular
1351* vectors matrix U.
1352*
1353 IF ( .NOT. wntuf )
1354 $ CALL cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,
1355 $ ldu, cwork(n+1), lcwork-n, ierr )
1356 IF ( rowprm .AND. .NOT.wntuf )
1357 $ CALL claswp( n1, u, ldu, 1, m-1, iwork(n+1), -1 )
1358*
1359* ... end of the "full SVD" branch
1360 END IF
1361*
1362* Check whether some singular values are returned as zeros, e.g.
1363* due to underflow, and update the numerical rank.
1364 p = nr
1365 DO 4001 q = p, 1, -1
1366 IF ( s(q) .GT. zero ) GO TO 4002
1367 nr = nr - 1
1368 4001 CONTINUE
1369 4002 CONTINUE
1370*
1371* .. if numerical rank deficiency is detected, the truncated
1372* singular values are set to zero.
1373 IF ( nr .LT. n ) CALL slaset( 'G', n-nr,1, zero,zero, s(nr+1), n )
1374* .. undo scaling; this may cause overflow in the largest singular
1375* values.
1376 IF ( ascaled )
1377 $ CALL slascl( 'G',0,0, one,sqrt(real(m)), nr,1, s, n, ierr )
1378 IF ( conda ) rwork(1) = sconda
1379 rwork(2) = p - nr
1380* .. p-NR is the number of singular values that are computed as
1381* exact zeros in CGESVD() applied to the (possibly truncated)
1382* full row rank triangular (trapezoidal) factor of A.
1383 numrank = nr
1384*
1385 RETURN
1386*
1387* End of CGESVDQ
1388*
subroutine slaset(uplo, m, n, alpha, beta, a, lda)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition slaset.f:110
subroutine cgesvd(jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, rwork, info)
CGESVD computes the singular value decomposition (SVD) for GE matrices
Definition cgesvd.f:214
subroutine clapmt(forwrd, m, n, x, ldx, k)
CLAPMT performs a forward or backward permutation of the columns of a matrix.
Definition clapmt.f:104

◆ cgesvdx()

subroutine cgesvdx ( character jobu,
character jobvt,
character range,
integer m,
integer n,
complex, dimension( lda, * ) a,
integer lda,
real vl,
real vu,
integer il,
integer iu,
integer ns,
real, dimension( * ) s,
complex, dimension( ldu, * ) u,
integer ldu,
complex, dimension( ldvt, * ) vt,
integer ldvt,
complex, dimension( * ) work,
integer lwork,
real, dimension( * ) rwork,
integer, dimension( * ) iwork,
integer info )

CGESVDX computes the singular value decomposition (SVD) for GE matrices

Download CGESVDX + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!>  CGESVDX computes the singular value decomposition (SVD) of a complex
!>  M-by-N matrix A, optionally computing the left and/or right singular
!>  vectors. The SVD is written
!>
!>      A = U * SIGMA * transpose(V)
!>
!>  where SIGMA is an M-by-N matrix which is zero except for its
!>  min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
!>  V is an N-by-N unitary matrix.  The diagonal elements of SIGMA
!>  are the singular values of A; they are real and non-negative, and
!>  are returned in descending order.  The first min(m,n) columns of
!>  U and V are the left and right singular vectors of A.
!>
!>  CGESVDX uses an eigenvalue problem for obtaining the SVD, which
!>  allows for the computation of a subset of singular values and
!>  vectors. See SBDSVDX for details.
!>
!>  Note that the routine returns V**T, not V.
!> 
Parameters
[in]JOBU
!>          JOBU is CHARACTER*1
!>          Specifies options for computing all or part of the matrix U:
!>          = 'V':  the first min(m,n) columns of U (the left singular
!>                  vectors) or as specified by RANGE are returned in
!>                  the array U;
!>          = 'N':  no columns of U (no left singular vectors) are
!>                  computed.
!> 
[in]JOBVT
!>          JOBVT is CHARACTER*1
!>           Specifies options for computing all or part of the matrix
!>           V**T:
!>           = 'V':  the first min(m,n) rows of V**T (the right singular
!>                   vectors) or as specified by RANGE are returned in
!>                   the array VT;
!>           = 'N':  no rows of V**T (no right singular vectors) are
!>                   computed.
!> 
[in]RANGE
!>          RANGE is CHARACTER*1
!>          = 'A': all singular values will be found.
!>          = 'V': all singular values in the half-open interval (VL,VU]
!>                 will be found.
!>          = 'I': the IL-th through IU-th singular values will be found.
!> 
[in]M
!>          M is INTEGER
!>          The number of rows of the input matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>          The number of columns of the input matrix A.  N >= 0.
!> 
[in,out]A
!>          A is COMPLEX array, dimension (LDA,N)
!>          On entry, the M-by-N matrix A.
!>          On exit, the contents of A are destroyed.
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A.  LDA >= max(1,M).
!> 
[in]VL
!>          VL is REAL
!>          If RANGE='V', the lower bound of the interval to
!>          be searched for singular values. VU > VL.
!>          Not referenced if RANGE = 'A' or 'I'.
!> 
[in]VU
!>          VU is REAL
!>          If RANGE='V', the upper bound of the interval to
!>          be searched for singular values. VU > VL.
!>          Not referenced if RANGE = 'A' or 'I'.
!> 
[in]IL
!>          IL is INTEGER
!>          If RANGE='I', the index of the
!>          smallest singular value to be returned.
!>          1 <= IL <= IU <= min(M,N), if min(M,N) > 0.
!>          Not referenced if RANGE = 'A' or 'V'.
!> 
[in]IU
!>          IU is INTEGER
!>          If RANGE='I', the index of the
!>          largest singular value to be returned.
!>          1 <= IL <= IU <= min(M,N), if min(M,N) > 0.
!>          Not referenced if RANGE = 'A' or 'V'.
!> 
[out]NS
!>          NS is INTEGER
!>          The total number of singular values found,
!>          0 <= NS <= min(M,N).
!>          If RANGE = 'A', NS = min(M,N); if RANGE = 'I', NS = IU-IL+1.
!> 
[out]S
!>          S is REAL array, dimension (min(M,N))
!>          The singular values of A, sorted so that S(i) >= S(i+1).
!> 
[out]U
!>          U is COMPLEX array, dimension (LDU,UCOL)
!>          If JOBU = 'V', U contains columns of U (the left singular
!>          vectors, stored columnwise) as specified by RANGE; if
!>          JOBU = 'N', U is not referenced.
!>          Note: The user must ensure that UCOL >= NS; if RANGE = 'V',
!>          the exact value of NS is not known in advance and an upper
!>          bound must be used.
!> 
[in]LDU
!>          LDU is INTEGER
!>          The leading dimension of the array U.  LDU >= 1; if
!>          JOBU = 'V', LDU >= M.
!> 
[out]VT
!>          VT is COMPLEX array, dimension (LDVT,N)
!>          If JOBVT = 'V', VT contains the rows of V**T (the right singular
!>          vectors, stored rowwise) as specified by RANGE; if JOBVT = 'N',
!>          VT is not referenced.
!>          Note: The user must ensure that LDVT >= NS; if RANGE = 'V',
!>          the exact value of NS is not known in advance and an upper
!>          bound must be used.
!> 
[in]LDVT
!>          LDVT is INTEGER
!>          The leading dimension of the array VT.  LDVT >= 1; if
!>          JOBVT = 'V', LDVT >= NS (see above).
!> 
[out]WORK
!>          WORK is COMPLEX array, dimension (MAX(1,LWORK))
!>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          The dimension of the array WORK.
!>          LWORK >= MAX(1,MIN(M,N)*(MIN(M,N)+4)) for the paths (see
!>          comments inside the code):
!>             - PATH 1  (M much larger than N)
!>             - PATH 1t (N much larger than M)
!>          LWORK >= MAX(1,MIN(M,N)*2+MAX(M,N)) for the other paths.
!>          For good performance, LWORK should generally be larger.
!>
!>          If LWORK = -1, then a workspace query is assumed; the routine
!>          only calculates the optimal size of the WORK array, returns
!>          this value as the first entry of the WORK array, and no error
!>          message related to LWORK is issued by XERBLA.
!> 
[out]RWORK
!>          RWORK is REAL array, dimension (MAX(1,LRWORK))
!>          LRWORK >= MIN(M,N)*(MIN(M,N)*2+15*MIN(M,N)).
!> 
[out]IWORK
!>          IWORK is INTEGER array, dimension (12*MIN(M,N))
!>          If INFO = 0, the first NS elements of IWORK are zero. If INFO > 0,
!>          then IWORK contains the indices of the eigenvectors that failed
!>          to converge in SBDSVDX/SSTEVX.
!> 
[out]INFO
!>     INFO is INTEGER
!>           = 0:  successful exit
!>           < 0:  if INFO = -i, the i-th argument had an illegal value
!>           > 0:  if INFO = i, then i eigenvectors failed to converge
!>                 in SBDSVDX/SSTEVX.
!>                 if INFO = N*2 + 1, an internal error occurred in
!>                 SBDSVDX
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 267 of file cgesvdx.f.

270*
271* -- LAPACK driver routine --
272* -- LAPACK is a software package provided by Univ. of Tennessee, --
273* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
274*
275* .. Scalar Arguments ..
276 CHARACTER JOBU, JOBVT, RANGE
277 INTEGER IL, INFO, IU, LDA, LDU, LDVT, LWORK, M, N, NS
278 REAL VL, VU
279* ..
280* .. Array Arguments ..
281 INTEGER IWORK( * )
282 REAL S( * ), RWORK( * )
283 COMPLEX A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
284 $ WORK( * )
285* ..
286*
287* =====================================================================
288*
289* .. Parameters ..
290 COMPLEX CZERO, CONE
291 parameter( czero = ( 0.0e0, 0.0e0 ),
292 $ cone = ( 1.0e0, 0.0e0 ) )
293 REAL ZERO, ONE
294 parameter( zero = 0.0e0, one = 1.0e0 )
295* ..
296* .. Local Scalars ..
297 CHARACTER JOBZ, RNGTGK
298 LOGICAL ALLS, INDS, LQUERY, VALS, WANTU, WANTVT
299 INTEGER I, ID, IE, IERR, ILQF, ILTGK, IQRF, ISCL,
300 $ ITAU, ITAUP, ITAUQ, ITEMP, ITEMPR, ITGKZ,
301 $ IUTGK, J, K, MAXWRK, MINMN, MINWRK, MNTHR
302 REAL ABSTOL, ANRM, BIGNUM, EPS, SMLNUM
303* ..
304* .. Local Arrays ..
305 REAL DUM( 1 )
306* ..
307* .. External Subroutines ..
308 EXTERNAL cgebrd, cgelqf, cgeqrf, clascl, claset,
311* ..
312* .. External Functions ..
313 LOGICAL LSAME
314 INTEGER ILAENV
315 REAL SLAMCH, CLANGE
316 EXTERNAL lsame, ilaenv, slamch, clange
317* ..
318* .. Intrinsic Functions ..
319 INTRINSIC max, min, sqrt
320* ..
321* .. Executable Statements ..
322*
323* Test the input arguments.
324*
325 ns = 0
326 info = 0
327 abstol = 2*slamch('S')
328 lquery = ( lwork.EQ.-1 )
329 minmn = min( m, n )
330
331 wantu = lsame( jobu, 'V' )
332 wantvt = lsame( jobvt, 'V' )
333 IF( wantu .OR. wantvt ) THEN
334 jobz = 'V'
335 ELSE
336 jobz = 'N'
337 END IF
338 alls = lsame( range, 'A' )
339 vals = lsame( range, 'V' )
340 inds = lsame( range, 'I' )
341*
342 info = 0
343 IF( .NOT.lsame( jobu, 'V' ) .AND.
344 $ .NOT.lsame( jobu, 'N' ) ) THEN
345 info = -1
346 ELSE IF( .NOT.lsame( jobvt, 'V' ) .AND.
347 $ .NOT.lsame( jobvt, 'N' ) ) THEN
348 info = -2
349 ELSE IF( .NOT.( alls .OR. vals .OR. inds ) ) THEN
350 info = -3
351 ELSE IF( m.LT.0 ) THEN
352 info = -4
353 ELSE IF( n.LT.0 ) THEN
354 info = -5
355 ELSE IF( m.GT.lda ) THEN
356 info = -7
357 ELSE IF( minmn.GT.0 ) THEN
358 IF( vals ) THEN
359 IF( vl.LT.zero ) THEN
360 info = -8
361 ELSE IF( vu.LE.vl ) THEN
362 info = -9
363 END IF
364 ELSE IF( inds ) THEN
365 IF( il.LT.1 .OR. il.GT.max( 1, minmn ) ) THEN
366 info = -10
367 ELSE IF( iu.LT.min( minmn, il ) .OR. iu.GT.minmn ) THEN
368 info = -11
369 END IF
370 END IF
371 IF( info.EQ.0 ) THEN
372 IF( wantu .AND. ldu.LT.m ) THEN
373 info = -15
374 ELSE IF( wantvt ) THEN
375 IF( inds ) THEN
376 IF( ldvt.LT.iu-il+1 ) THEN
377 info = -17
378 END IF
379 ELSE IF( ldvt.LT.minmn ) THEN
380 info = -17
381 END IF
382 END IF
383 END IF
384 END IF
385*
386* Compute workspace
387* (Note: Comments in the code beginning "Workspace:" describe the
388* minimal amount of workspace needed at that point in the code,
389* as well as the preferred amount for good performance.
390* NB refers to the optimal block size for the immediately
391* following subroutine, as returned by ILAENV.)
392*
393 IF( info.EQ.0 ) THEN
394 minwrk = 1
395 maxwrk = 1
396 IF( minmn.GT.0 ) THEN
397 IF( m.GE.n ) THEN
398 mnthr = ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 )
399 IF( m.GE.mnthr ) THEN
400*
401* Path 1 (M much larger than N)
402*
403 minwrk = n*(n+5)
404 maxwrk = n + n*ilaenv(1,'CGEQRF',' ',m,n,-1,-1)
405 maxwrk = max(maxwrk,
406 $ n*n+2*n+2*n*ilaenv(1,'CGEBRD',' ',n,n,-1,-1))
407 IF (wantu .OR. wantvt) THEN
408 maxwrk = max(maxwrk,
409 $ n*n+2*n+n*ilaenv(1,'CUNMQR','LN',n,n,n,-1))
410 END IF
411 ELSE
412*
413* Path 2 (M at least N, but not much larger)
414*
415 minwrk = 3*n + m
416 maxwrk = 2*n + (m+n)*ilaenv(1,'CGEBRD',' ',m,n,-1,-1)
417 IF (wantu .OR. wantvt) THEN
418 maxwrk = max(maxwrk,
419 $ 2*n+n*ilaenv(1,'CUNMQR','LN',n,n,n,-1))
420 END IF
421 END IF
422 ELSE
423 mnthr = ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 )
424 IF( n.GE.mnthr ) THEN
425*
426* Path 1t (N much larger than M)
427*
428 minwrk = m*(m+5)
429 maxwrk = m + m*ilaenv(1,'CGELQF',' ',m,n,-1,-1)
430 maxwrk = max(maxwrk,
431 $ m*m+2*m+2*m*ilaenv(1,'CGEBRD',' ',m,m,-1,-1))
432 IF (wantu .OR. wantvt) THEN
433 maxwrk = max(maxwrk,
434 $ m*m+2*m+m*ilaenv(1,'CUNMQR','LN',m,m,m,-1))
435 END IF
436 ELSE
437*
438* Path 2t (N greater than M, but not much larger)
439*
440*
441 minwrk = 3*m + n
442 maxwrk = 2*m + (m+n)*ilaenv(1,'CGEBRD',' ',m,n,-1,-1)
443 IF (wantu .OR. wantvt) THEN
444 maxwrk = max(maxwrk,
445 $ 2*m+m*ilaenv(1,'CUNMQR','LN',m,m,m,-1))
446 END IF
447 END IF
448 END IF
449 END IF
450 maxwrk = max( maxwrk, minwrk )
451 work( 1 ) = cmplx( real( maxwrk ), zero )
452*
453 IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
454 info = -19
455 END IF
456 END IF
457*
458 IF( info.NE.0 ) THEN
459 CALL xerbla( 'CGESVDX', -info )
460 RETURN
461 ELSE IF( lquery ) THEN
462 RETURN
463 END IF
464*
465* Quick return if possible
466*
467 IF( m.EQ.0 .OR. n.EQ.0 ) THEN
468 RETURN
469 END IF
470*
471* Set singular values indices accord to RANGE='A'.
472*
473 IF( alls ) THEN
474 rngtgk = 'I'
475 iltgk = 1
476 iutgk = min( m, n )
477 ELSE IF( inds ) THEN
478 rngtgk = 'I'
479 iltgk = il
480 iutgk = iu
481 ELSE
482 rngtgk = 'V'
483 iltgk = 0
484 iutgk = 0
485 END IF
486*
487* Get machine constants
488*
489 eps = slamch( 'P' )
490 smlnum = sqrt( slamch( 'S' ) ) / eps
491 bignum = one / smlnum
492*
493* Scale A if max element outside range [SMLNUM,BIGNUM]
494*
495 anrm = clange( 'M', m, n, a, lda, dum )
496 iscl = 0
497 IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
498 iscl = 1
499 CALL clascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, info )
500 ELSE IF( anrm.GT.bignum ) THEN
501 iscl = 1
502 CALL clascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info )
503 END IF
504*
505 IF( m.GE.n ) THEN
506*
507* A has at least as many rows as columns. If A has sufficiently
508* more rows than columns, first reduce A using the QR
509* decomposition.
510*
511 IF( m.GE.mnthr ) THEN
512*
513* Path 1 (M much larger than N):
514* A = Q * R = Q * ( QB * B * PB**T )
515* = Q * ( QB * ( UB * S * VB**T ) * PB**T )
516* U = Q * QB * UB; V**T = VB**T * PB**T
517*
518* Compute A=Q*R
519* (Workspace: need 2*N, prefer N+N*NB)
520*
521 itau = 1
522 itemp = itau + n
523 CALL cgeqrf( m, n, a, lda, work( itau ), work( itemp ),
524 $ lwork-itemp+1, info )
525*
526* Copy R into WORK and bidiagonalize it:
527* (Workspace: need N*N+3*N, prefer N*N+N+2*N*NB)
528*
529 iqrf = itemp
530 itauq = itemp + n*n
531 itaup = itauq + n
532 itemp = itaup + n
533 id = 1
534 ie = id + n
535 itgkz = ie + n
536 CALL clacpy( 'U', n, n, a, lda, work( iqrf ), n )
537 CALL claset( 'L', n-1, n-1, czero, czero,
538 $ work( iqrf+1 ), n )
539 CALL cgebrd( n, n, work( iqrf ), n, rwork( id ),
540 $ rwork( ie ), work( itauq ), work( itaup ),
541 $ work( itemp ), lwork-itemp+1, info )
542 itempr = itgkz + n*(n*2+1)
543*
544* Solve eigenvalue problem TGK*Z=Z*S.
545* (Workspace: need 2*N*N+14*N)
546*
547 CALL sbdsvdx( 'U', jobz, rngtgk, n, rwork( id ),
548 $ rwork( ie ), vl, vu, iltgk, iutgk, ns, s,
549 $ rwork( itgkz ), n*2, rwork( itempr ),
550 $ iwork, info)
551*
552* If needed, compute left singular vectors.
553*
554 IF( wantu ) THEN
555 k = itgkz
556 DO i = 1, ns
557 DO j = 1, n
558 u( j, i ) = cmplx( rwork( k ), zero )
559 k = k + 1
560 END DO
561 k = k + n
562 END DO
563 CALL claset( 'A', m-n, ns, czero, czero, u( n+1,1 ), ldu)
564*
565* Call CUNMBR to compute QB*UB.
566* (Workspace in WORK( ITEMP ): need N, prefer N*NB)
567*
568 CALL cunmbr( 'Q', 'L', 'N', n, ns, n, work( iqrf ), n,
569 $ work( itauq ), u, ldu, work( itemp ),
570 $ lwork-itemp+1, info )
571*
572* Call CUNMQR to compute Q*(QB*UB).
573* (Workspace in WORK( ITEMP ): need N, prefer N*NB)
574*
575 CALL cunmqr( 'L', 'N', m, ns, n, a, lda,
576 $ work( itau ), u, ldu, work( itemp ),
577 $ lwork-itemp+1, info )
578 END IF
579*
580* If needed, compute right singular vectors.
581*
582 IF( wantvt) THEN
583 k = itgkz + n
584 DO i = 1, ns
585 DO j = 1, n
586 vt( i, j ) = cmplx( rwork( k ), zero )
587 k = k + 1
588 END DO
589 k = k + n
590 END DO
591*
592* Call CUNMBR to compute VB**T * PB**T
593* (Workspace in WORK( ITEMP ): need N, prefer N*NB)
594*
595 CALL cunmbr( 'P', 'R', 'C', ns, n, n, work( iqrf ), n,
596 $ work( itaup ), vt, ldvt, work( itemp ),
597 $ lwork-itemp+1, info )
598 END IF
599 ELSE
600*
601* Path 2 (M at least N, but not much larger)
602* Reduce A to bidiagonal form without QR decomposition
603* A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T
604* U = QB * UB; V**T = VB**T * PB**T
605*
606* Bidiagonalize A
607* (Workspace: need 2*N+M, prefer 2*N+(M+N)*NB)
608*
609 itauq = 1
610 itaup = itauq + n
611 itemp = itaup + n
612 id = 1
613 ie = id + n
614 itgkz = ie + n
615 CALL cgebrd( m, n, a, lda, rwork( id ), rwork( ie ),
616 $ work( itauq ), work( itaup ), work( itemp ),
617 $ lwork-itemp+1, info )
618 itempr = itgkz + n*(n*2+1)
619*
620* Solve eigenvalue problem TGK*Z=Z*S.
621* (Workspace: need 2*N*N+14*N)
622*
623 CALL sbdsvdx( 'U', jobz, rngtgk, n, rwork( id ),
624 $ rwork( ie ), vl, vu, iltgk, iutgk, ns, s,
625 $ rwork( itgkz ), n*2, rwork( itempr ),
626 $ iwork, info)
627*
628* If needed, compute left singular vectors.
629*
630 IF( wantu ) THEN
631 k = itgkz
632 DO i = 1, ns
633 DO j = 1, n
634 u( j, i ) = cmplx( rwork( k ), zero )
635 k = k + 1
636 END DO
637 k = k + n
638 END DO
639 CALL claset( 'A', m-n, ns, czero, czero, u( n+1,1 ), ldu)
640*
641* Call CUNMBR to compute QB*UB.
642* (Workspace in WORK( ITEMP ): need N, prefer N*NB)
643*
644 CALL cunmbr( 'Q', 'L', 'N', m, ns, n, a, lda,
645 $ work( itauq ), u, ldu, work( itemp ),
646 $ lwork-itemp+1, ierr )
647 END IF
648*
649* If needed, compute right singular vectors.
650*
651 IF( wantvt) THEN
652 k = itgkz + n
653 DO i = 1, ns
654 DO j = 1, n
655 vt( i, j ) = cmplx( rwork( k ), zero )
656 k = k + 1
657 END DO
658 k = k + n
659 END DO
660*
661* Call CUNMBR to compute VB**T * PB**T
662* (Workspace in WORK( ITEMP ): need N, prefer N*NB)
663*
664 CALL cunmbr( 'P', 'R', 'C', ns, n, n, a, lda,
665 $ work( itaup ), vt, ldvt, work( itemp ),
666 $ lwork-itemp+1, ierr )
667 END IF
668 END IF
669 ELSE
670*
671* A has more columns than rows. If A has sufficiently more
672* columns than rows, first reduce A using the LQ decomposition.
673*
674 IF( n.GE.mnthr ) THEN
675*
676* Path 1t (N much larger than M):
677* A = L * Q = ( QB * B * PB**T ) * Q
678* = ( QB * ( UB * S * VB**T ) * PB**T ) * Q
679* U = QB * UB ; V**T = VB**T * PB**T * Q
680*
681* Compute A=L*Q
682* (Workspace: need 2*M, prefer M+M*NB)
683*
684 itau = 1
685 itemp = itau + m
686 CALL cgelqf( m, n, a, lda, work( itau ), work( itemp ),
687 $ lwork-itemp+1, info )
688
689* Copy L into WORK and bidiagonalize it:
690* (Workspace in WORK( ITEMP ): need M*M+3*M, prefer M*M+M+2*M*NB)
691*
692 ilqf = itemp
693 itauq = ilqf + m*m
694 itaup = itauq + m
695 itemp = itaup + m
696 id = 1
697 ie = id + m
698 itgkz = ie + m
699 CALL clacpy( 'L', m, m, a, lda, work( ilqf ), m )
700 CALL claset( 'U', m-1, m-1, czero, czero,
701 $ work( ilqf+m ), m )
702 CALL cgebrd( m, m, work( ilqf ), m, rwork( id ),
703 $ rwork( ie ), work( itauq ), work( itaup ),
704 $ work( itemp ), lwork-itemp+1, info )
705 itempr = itgkz + m*(m*2+1)
706*
707* Solve eigenvalue problem TGK*Z=Z*S.
708* (Workspace: need 2*M*M+14*M)
709*
710 CALL sbdsvdx( 'U', jobz, rngtgk, m, rwork( id ),
711 $ rwork( ie ), vl, vu, iltgk, iutgk, ns, s,
712 $ rwork( itgkz ), m*2, rwork( itempr ),
713 $ iwork, info)
714*
715* If needed, compute left singular vectors.
716*
717 IF( wantu ) THEN
718 k = itgkz
719 DO i = 1, ns
720 DO j = 1, m
721 u( j, i ) = cmplx( rwork( k ), zero )
722 k = k + 1
723 END DO
724 k = k + m
725 END DO
726*
727* Call CUNMBR to compute QB*UB.
728* (Workspace in WORK( ITEMP ): need M, prefer M*NB)
729*
730 CALL cunmbr( 'Q', 'L', 'N', m, ns, m, work( ilqf ), m,
731 $ work( itauq ), u, ldu, work( itemp ),
732 $ lwork-itemp+1, info )
733 END IF
734*
735* If needed, compute right singular vectors.
736*
737 IF( wantvt) THEN
738 k = itgkz + m
739 DO i = 1, ns
740 DO j = 1, m
741 vt( i, j ) = cmplx( rwork( k ), zero )
742 k = k + 1
743 END DO
744 k = k + m
745 END DO
746 CALL claset( 'A', ns, n-m, czero, czero,
747 $ vt( 1,m+1 ), ldvt )
748*
749* Call CUNMBR to compute (VB**T)*(PB**T)
750* (Workspace in WORK( ITEMP ): need M, prefer M*NB)
751*
752 CALL cunmbr( 'P', 'R', 'C', ns, m, m, work( ilqf ), m,
753 $ work( itaup ), vt, ldvt, work( itemp ),
754 $ lwork-itemp+1, info )
755*
756* Call CUNMLQ to compute ((VB**T)*(PB**T))*Q.
757* (Workspace in WORK( ITEMP ): need M, prefer M*NB)
758*
759 CALL cunmlq( 'R', 'N', ns, n, m, a, lda,
760 $ work( itau ), vt, ldvt, work( itemp ),
761 $ lwork-itemp+1, info )
762 END IF
763 ELSE
764*
765* Path 2t (N greater than M, but not much larger)
766* Reduce to bidiagonal form without LQ decomposition
767* A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T
768* U = QB * UB; V**T = VB**T * PB**T
769*
770* Bidiagonalize A
771* (Workspace: need 2*M+N, prefer 2*M+(M+N)*NB)
772*
773 itauq = 1
774 itaup = itauq + m
775 itemp = itaup + m
776 id = 1
777 ie = id + m
778 itgkz = ie + m
779 CALL cgebrd( m, n, a, lda, rwork( id ), rwork( ie ),
780 $ work( itauq ), work( itaup ), work( itemp ),
781 $ lwork-itemp+1, info )
782 itempr = itgkz + m*(m*2+1)
783*
784* Solve eigenvalue problem TGK*Z=Z*S.
785* (Workspace: need 2*M*M+14*M)
786*
787 CALL sbdsvdx( 'L', jobz, rngtgk, m, rwork( id ),
788 $ rwork( ie ), vl, vu, iltgk, iutgk, ns, s,
789 $ rwork( itgkz ), m*2, rwork( itempr ),
790 $ iwork, info)
791*
792* If needed, compute left singular vectors.
793*
794 IF( wantu ) THEN
795 k = itgkz
796 DO i = 1, ns
797 DO j = 1, m
798 u( j, i ) = cmplx( rwork( k ), zero )
799 k = k + 1
800 END DO
801 k = k + m
802 END DO
803*
804* Call CUNMBR to compute QB*UB.
805* (Workspace in WORK( ITEMP ): need M, prefer M*NB)
806*
807 CALL cunmbr( 'Q', 'L', 'N', m, ns, n, a, lda,
808 $ work( itauq ), u, ldu, work( itemp ),
809 $ lwork-itemp+1, info )
810 END IF
811*
812* If needed, compute right singular vectors.
813*
814 IF( wantvt) THEN
815 k = itgkz + m
816 DO i = 1, ns
817 DO j = 1, m
818 vt( i, j ) = cmplx( rwork( k ), zero )
819 k = k + 1
820 END DO
821 k = k + m
822 END DO
823 CALL claset( 'A', ns, n-m, czero, czero,
824 $ vt( 1,m+1 ), ldvt )
825*
826* Call CUNMBR to compute VB**T * PB**T
827* (Workspace in WORK( ITEMP ): need M, prefer M*NB)
828*
829 CALL cunmbr( 'P', 'R', 'C', ns, n, m, a, lda,
830 $ work( itaup ), vt, ldvt, work( itemp ),
831 $ lwork-itemp+1, info )
832 END IF
833 END IF
834 END IF
835*
836* Undo scaling if necessary
837*
838 IF( iscl.EQ.1 ) THEN
839 IF( anrm.GT.bignum )
840 $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn, 1,
841 $ s, minmn, info )
842 IF( anrm.LT.smlnum )
843 $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn, 1,
844 $ s, minmn, info )
845 END IF
846*
847* Return optimal workspace in WORK(1)
848*
849 work( 1 ) = cmplx( real( maxwrk ), zero )
850*
851 RETURN
852*
853* End of CGESVDX
854*
subroutine sbdsvdx(uplo, jobz, range, n, d, e, vl, vu, il, iu, ns, s, z, ldz, work, iwork, info)
SBDSVDX
Definition sbdsvdx.f:226
initmumps id

◆ cggsvd3()

subroutine cggsvd3 ( character jobu,
character jobv,
character jobq,
integer m,
integer n,
integer p,
integer k,
integer l,
complex, dimension( lda, * ) a,
integer lda,
complex, dimension( ldb, * ) b,
integer ldb,
real, dimension( * ) alpha,
real, dimension( * ) beta,
complex, dimension( ldu, * ) u,
integer ldu,
complex, dimension( ldv, * ) v,
integer ldv,
complex, dimension( ldq, * ) q,
integer ldq,
complex, dimension( * ) work,
integer lwork,
real, dimension( * ) rwork,
integer, dimension( * ) iwork,
integer info )

CGGSVD3 computes the singular value decomposition (SVD) for OTHER matrices

Download CGGSVD3 + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> CGGSVD3 computes the generalized singular value decomposition (GSVD)
!> of an M-by-N complex matrix A and P-by-N complex matrix B:
!>
!>       U**H*A*Q = D1*( 0 R ),    V**H*B*Q = D2*( 0 R )
!>
!> where U, V and Q are unitary matrices.
!> Let K+L = the effective numerical rank of the
!> matrix (A**H,B**H)**H, then R is a (K+L)-by-(K+L) nonsingular upper
!> triangular matrix, D1 and D2 are M-by-(K+L) and P-by-(K+L) 
!> matrices and of the following structures, respectively:
!>
!> If M-K-L >= 0,
!>
!>                     K  L
!>        D1 =     K ( I  0 )
!>                 L ( 0  C )
!>             M-K-L ( 0  0 )
!>
!>                   K  L
!>        D2 =   L ( 0  S )
!>             P-L ( 0  0 )
!>
!>                 N-K-L  K    L
!>   ( 0 R ) = K (  0   R11  R12 )
!>             L (  0    0   R22 )
!>
!> where
!>
!>   C = diag( ALPHA(K+1), ... , ALPHA(K+L) ),
!>   S = diag( BETA(K+1),  ... , BETA(K+L) ),
!>   C**2 + S**2 = I.
!>
!>   R is stored in A(1:K+L,N-K-L+1:N) on exit.
!>
!> If M-K-L < 0,
!>
!>                   K M-K K+L-M
!>        D1 =   K ( I  0    0   )
!>             M-K ( 0  C    0   )
!>
!>                     K M-K K+L-M
!>        D2 =   M-K ( 0  S    0  )
!>             K+L-M ( 0  0    I  )
!>               P-L ( 0  0    0  )
!>
!>                    N-K-L  K   M-K  K+L-M
!>   ( 0 R ) =     K ( 0    R11  R12  R13  )
!>               M-K ( 0     0   R22  R23  )
!>             K+L-M ( 0     0    0   R33  )
!>
!> where
!>
!>   C = diag( ALPHA(K+1), ... , ALPHA(M) ),
!>   S = diag( BETA(K+1),  ... , BETA(M) ),
!>   C**2 + S**2 = I.
!>
!>   (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored
!>   ( 0  R22 R23 )
!>   in B(M-K+1:L,N+M-K-L+1:N) on exit.
!>
!> The routine computes C, S, R, and optionally the unitary
!> transformation matrices U, V and Q.
!>
!> In particular, if B is an N-by-N nonsingular matrix, then the GSVD of
!> A and B implicitly gives the SVD of A*inv(B):
!>                      A*inv(B) = U*(D1*inv(D2))*V**H.
!> If ( A**H,B**H)**H has orthonormal columns, then the GSVD of A and B is also
!> equal to the CS decomposition of A and B. Furthermore, the GSVD can
!> be used to derive the solution of the eigenvalue problem:
!>                      A**H*A x = lambda* B**H*B x.
!> In some literature, the GSVD of A and B is presented in the form
!>                  U**H*A*X = ( 0 D1 ),   V**H*B*X = ( 0 D2 )
!> where U and V are orthogonal and X is nonsingular, and D1 and D2 are
!> ``diagonal''.  The former GSVD form can be converted to the latter
!> form by taking the nonsingular matrix X as
!>
!>                       X = Q*(  I   0    )
!>                             (  0 inv(R) )
!> 
Parameters
[in]JOBU
!>          JOBU is CHARACTER*1
!>          = 'U':  Unitary matrix U is computed;
!>          = 'N':  U is not computed.
!> 
[in]JOBV
!>          JOBV is CHARACTER*1
!>          = 'V':  Unitary matrix V is computed;
!>          = 'N':  V is not computed.
!> 
[in]JOBQ
!>          JOBQ is CHARACTER*1
!>          = 'Q':  Unitary matrix Q is computed;
!>          = 'N':  Q is not computed.
!> 
[in]M
!>          M is INTEGER
!>          The number of rows of the matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>          The number of columns of the matrices A and B.  N >= 0.
!> 
[in]P
!>          P is INTEGER
!>          The number of rows of the matrix B.  P >= 0.
!> 
[out]K
!>          K is INTEGER
!> 
[out]L
!>          L is INTEGER
!>
!>          On exit, K and L specify the dimension of the subblocks
!>          described in Purpose.
!>          K + L = effective numerical rank of (A**H,B**H)**H.
!> 
[in,out]A
!>          A is COMPLEX array, dimension (LDA,N)
!>          On entry, the M-by-N matrix A.
!>          On exit, A contains the triangular matrix R, or part of R.
!>          See Purpose for details.
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A. LDA >= max(1,M).
!> 
[in,out]B
!>          B is COMPLEX array, dimension (LDB,N)
!>          On entry, the P-by-N matrix B.
!>          On exit, B contains part of the triangular matrix R if
!>          M-K-L < 0.  See Purpose for details.
!> 
[in]LDB
!>          LDB is INTEGER
!>          The leading dimension of the array B. LDB >= max(1,P).
!> 
[out]ALPHA
!>          ALPHA is REAL array, dimension (N)
!> 
[out]BETA
!>          BETA is REAL array, dimension (N)
!>
!>          On exit, ALPHA and BETA contain the generalized singular
!>          value pairs of A and B;
!>            ALPHA(1:K) = 1,
!>            BETA(1:K)  = 0,
!>          and if M-K-L >= 0,
!>            ALPHA(K+1:K+L) = C,
!>            BETA(K+1:K+L)  = S,
!>          or if M-K-L < 0,
!>            ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0
!>            BETA(K+1:M) =S, BETA(M+1:K+L) =1
!>          and
!>            ALPHA(K+L+1:N) = 0
!>            BETA(K+L+1:N)  = 0
!> 
[out]U
!>          U is COMPLEX array, dimension (LDU,M)
!>          If JOBU = 'U', U contains the M-by-M unitary matrix U.
!>          If JOBU = 'N', U is not referenced.
!> 
[in]LDU
!>          LDU is INTEGER
!>          The leading dimension of the array U. LDU >= max(1,M) if
!>          JOBU = 'U'; LDU >= 1 otherwise.
!> 
[out]V
!>          V is COMPLEX array, dimension (LDV,P)
!>          If JOBV = 'V', V contains the P-by-P unitary matrix V.
!>          If JOBV = 'N', V is not referenced.
!> 
[in]LDV
!>          LDV is INTEGER
!>          The leading dimension of the array V. LDV >= max(1,P) if
!>          JOBV = 'V'; LDV >= 1 otherwise.
!> 
[out]Q
!>          Q is COMPLEX array, dimension (LDQ,N)
!>          If JOBQ = 'Q', Q contains the N-by-N unitary matrix Q.
!>          If JOBQ = 'N', Q is not referenced.
!> 
[in]LDQ
!>          LDQ is INTEGER
!>          The leading dimension of the array Q. LDQ >= max(1,N) if
!>          JOBQ = 'Q'; LDQ >= 1 otherwise.
!> 
[out]WORK
!>          WORK is COMPLEX array, dimension (MAX(1,LWORK))
!>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          The dimension of the array WORK.
!>
!>          If LWORK = -1, then a workspace query is assumed; the routine
!>          only calculates the optimal size of the WORK array, returns
!>          this value as the first entry of the WORK array, and no error
!>          message related to LWORK is issued by XERBLA.
!> 
[out]RWORK
!>          RWORK is REAL array, dimension (2*N)
!> 
[out]IWORK
!>          IWORK is INTEGER array, dimension (N)
!>          On exit, IWORK stores the sorting information. More
!>          precisely, the following loop will sort ALPHA
!>             for I = K+1, min(M,K+L)
!>                 swap ALPHA(I) and ALPHA(IWORK(I))
!>             endfor
!>          such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N).
!> 
[out]INFO
!>          INFO is INTEGER
!>          = 0:  successful exit.
!>          < 0:  if INFO = -i, the i-th argument had an illegal value.
!>          > 0:  if INFO = 1, the Jacobi-type procedure failed to
!>                converge.  For further details, see subroutine CTGSJA.
!> 
Internal Parameters:
!>  TOLA    REAL
!>  TOLB    REAL
!>          TOLA and TOLB are the thresholds to determine the effective
!>          rank of (A**H,B**H)**H. Generally, they are set to
!>                   TOLA = MAX(M,N)*norm(A)*MACHEPS,
!>                   TOLB = MAX(P,N)*norm(B)*MACHEPS.
!>          The size of TOLA and TOLB may affect the size of backward
!>          errors of the decomposition.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Contributors:
Ming Gu and Huan Ren, Computer Science Division, University of California at Berkeley, USA
Further Details:
CGGSVD3 replaces the deprecated subroutine CGGSVD.

Definition at line 351 of file cggsvd3.f.

354*
355* -- LAPACK driver routine --
356* -- LAPACK is a software package provided by Univ. of Tennessee, --
357* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
358*
359* .. Scalar Arguments ..
360 CHARACTER JOBQ, JOBU, JOBV
361 INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P,
362 $ LWORK
363* ..
364* .. Array Arguments ..
365 INTEGER IWORK( * )
366 REAL ALPHA( * ), BETA( * ), RWORK( * )
367 COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
368 $ U( LDU, * ), V( LDV, * ), WORK( * )
369* ..
370*
371* =====================================================================
372*
373* .. Local Scalars ..
374 LOGICAL WANTQ, WANTU, WANTV, LQUERY
375 INTEGER I, IBND, ISUB, J, NCYCLE, LWKOPT
376 REAL ANORM, BNORM, SMAX, TEMP, TOLA, TOLB, ULP, UNFL
377* ..
378* .. External Functions ..
379 LOGICAL LSAME
380 REAL CLANGE, SLAMCH
381 EXTERNAL lsame, clange, slamch
382* ..
383* .. External Subroutines ..
384 EXTERNAL cggsvp3, ctgsja, scopy, xerbla
385* ..
386* .. Intrinsic Functions ..
387 INTRINSIC max, min
388* ..
389* .. Executable Statements ..
390*
391* Decode and test the input parameters
392*
393 wantu = lsame( jobu, 'U' )
394 wantv = lsame( jobv, 'V' )
395 wantq = lsame( jobq, 'Q' )
396 lquery = ( lwork.EQ.-1 )
397 lwkopt = 1
398*
399* Test the input arguments
400*
401 info = 0
402 IF( .NOT.( wantu .OR. lsame( jobu, 'N' ) ) ) THEN
403 info = -1
404 ELSE IF( .NOT.( wantv .OR. lsame( jobv, 'N' ) ) ) THEN
405 info = -2
406 ELSE IF( .NOT.( wantq .OR. lsame( jobq, 'N' ) ) ) THEN
407 info = -3
408 ELSE IF( m.LT.0 ) THEN
409 info = -4
410 ELSE IF( n.LT.0 ) THEN
411 info = -5
412 ELSE IF( p.LT.0 ) THEN
413 info = -6
414 ELSE IF( lda.LT.max( 1, m ) ) THEN
415 info = -10
416 ELSE IF( ldb.LT.max( 1, p ) ) THEN
417 info = -12
418 ELSE IF( ldu.LT.1 .OR. ( wantu .AND. ldu.LT.m ) ) THEN
419 info = -16
420 ELSE IF( ldv.LT.1 .OR. ( wantv .AND. ldv.LT.p ) ) THEN
421 info = -18
422 ELSE IF( ldq.LT.1 .OR. ( wantq .AND. ldq.LT.n ) ) THEN
423 info = -20
424 ELSE IF( lwork.LT.1 .AND. .NOT.lquery ) THEN
425 info = -24
426 END IF
427*
428* Compute workspace
429*
430 IF( info.EQ.0 ) THEN
431 CALL cggsvp3( jobu, jobv, jobq, m, p, n, a, lda, b, ldb, tola,
432 $ tolb, k, l, u, ldu, v, ldv, q, ldq, iwork, rwork,
433 $ work, work, -1, info )
434 lwkopt = n + int( work( 1 ) )
435 lwkopt = max( 2*n, lwkopt )
436 lwkopt = max( 1, lwkopt )
437 work( 1 ) = cmplx( lwkopt )
438 END IF
439*
440 IF( info.NE.0 ) THEN
441 CALL xerbla( 'CGGSVD3', -info )
442 RETURN
443 END IF
444 IF( lquery ) THEN
445 RETURN
446 ENDIF
447*
448* Compute the Frobenius norm of matrices A and B
449*
450 anorm = clange( '1', m, n, a, lda, rwork )
451 bnorm = clange( '1', p, n, b, ldb, rwork )
452*
453* Get machine precision and set up threshold for determining
454* the effective numerical rank of the matrices A and B.
455*
456 ulp = slamch( 'Precision' )
457 unfl = slamch( 'Safe Minimum' )
458 tola = max( m, n )*max( anorm, unfl )*ulp
459 tolb = max( p, n )*max( bnorm, unfl )*ulp
460*
461 CALL cggsvp3( jobu, jobv, jobq, m, p, n, a, lda, b, ldb, tola,
462 $ tolb, k, l, u, ldu, v, ldv, q, ldq, iwork, rwork,
463 $ work, work( n+1 ), lwork-n, info )
464*
465* Compute the GSVD of two upper "triangular" matrices
466*
467 CALL ctgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b, ldb,
468 $ tola, tolb, alpha, beta, u, ldu, v, ldv, q, ldq,
469 $ work, ncycle, info )
470*
471* Sort the singular values and store the pivot indices in IWORK
472* Copy ALPHA to RWORK, then sort ALPHA in RWORK
473*
474 CALL scopy( n, alpha, 1, rwork, 1 )
475 ibnd = min( l, m-k )
476 DO 20 i = 1, ibnd
477*
478* Scan for largest ALPHA(K+I)
479*
480 isub = i
481 smax = rwork( k+i )
482 DO 10 j = i + 1, ibnd
483 temp = rwork( k+j )
484 IF( temp.GT.smax ) THEN
485 isub = j
486 smax = temp
487 END IF
488 10 CONTINUE
489 IF( isub.NE.i ) THEN
490 rwork( k+isub ) = rwork( k+i )
491 rwork( k+i ) = smax
492 iwork( k+i ) = k + isub
493 ELSE
494 iwork( k+i ) = k + i
495 END IF
496 20 CONTINUE
497*
498 work( 1 ) = cmplx( lwkopt )
499 RETURN
500*
501* End of CGGSVD3
502*
#define alpha
Definition eval.h:35
subroutine cggsvp3(jobu, jobv, jobq, m, p, n, a, lda, b, ldb, tola, tolb, k, l, u, ldu, v, ldv, q, ldq, iwork, rwork, tau, work, lwork, info)
CGGSVP3
Definition cggsvp3.f:278
subroutine ctgsja(jobu, jobv, jobq, m, p, n, k, l, a, lda, b, ldb, tola, tolb, alpha, beta, u, ldu, v, ldv, q, ldq, work, ncycle, info)
CTGSJA
Definition ctgsja.f:379
subroutine scopy(n, sx, incx, sy, incy)
SCOPY
Definition scopy.f:82