OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches

Functions

subroutine sgejsv (joba, jobu, jobv, jobr, jobt, jobp, m, n, a, lda, sva, u, ldu, v, ldv, work, lwork, iwork, info)
 SGEJSV
subroutine sgesdd (jobz, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, iwork, info)
 SGESDD
subroutine sgesvd (jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, info)
  SGESVD computes the singular value decomposition (SVD) for GE matrices
subroutine sgesvdq (joba, jobp, jobr, jobu, jobv, m, n, a, lda, s, u, ldu, v, ldv, numrank, iwork, liwork, work, lwork, rwork, lrwork, info)
  SGESVDQ computes the singular value decomposition (SVD) with a QR-Preconditioned QR SVD Method for GE matrices
subroutine sgesvdx (jobu, jobvt, range, m, n, a, lda, vl, vu, il, iu, ns, s, u, ldu, vt, ldvt, work, lwork, iwork, info)
  SGESVDX computes the singular value decomposition (SVD) for GE matrices
subroutine sggsvd3 (jobu, jobv, jobq, m, n, p, k, l, a, lda, b, ldb, alpha, beta, u, ldu, v, ldv, q, ldq, work, lwork, iwork, info)
  SGGSVD3 computes the singular value decomposition (SVD) for OTHER matrices

Detailed Description

This is the group of real singular value driver functions for GE matrices

Function Documentation

◆ sgejsv()

subroutine sgejsv ( character*1 joba,
character*1 jobu,
character*1 jobv,
character*1 jobr,
character*1 jobt,
character*1 jobp,
integer m,
integer n,
real, dimension( lda, * ) a,
integer lda,
real, dimension( n ) sva,
real, dimension( ldu, * ) u,
integer ldu,
real, dimension( ldv, * ) v,
integer ldv,
real, dimension( lwork ) work,
integer lwork,
integer, dimension( * ) iwork,
integer info )

SGEJSV

Download SGEJSV + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> SGEJSV computes the singular value decomposition (SVD) of a real M-by-N
!> matrix [A], where M >= N. The SVD of [A] is written as
!>
!>              [A] = [U] * [SIGMA] * [V]^t,
!>
!> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N
!> diagonal elements, [U] is an M-by-N (or M-by-M) orthonormal matrix, and
!> [V] is an N-by-N orthogonal matrix. The diagonal elements of [SIGMA] are
!> the singular values of [A]. The columns of [U] and [V] are the left and
!> the right singular vectors of [A], respectively. The matrices [U] and [V]
!> are computed and stored in the arrays U and V, respectively. The diagonal
!> of [SIGMA] is computed and stored in the array SVA.
!> SGEJSV can sometimes compute tiny singular values and their singular vectors much
!> more accurately than other SVD routines, see below under Further Details.
!> 
Parameters
[in]JOBA
!>          JOBA is CHARACTER*1
!>         Specifies the level of accuracy:
!>       = 'C': This option works well (high relative accuracy) if A = B * D,
!>              with well-conditioned B and arbitrary diagonal matrix D.
!>              The accuracy cannot be spoiled by COLUMN scaling. The
!>              accuracy of the computed output depends on the condition of
!>              B, and the procedure aims at the best theoretical accuracy.
!>              The relative error max_{i=1:N}|d sigma_i| / sigma_i is
!>              bounded by f(M,N)*epsilon* cond(B), independent of D.
!>              The input matrix is preprocessed with the QRF with column
!>              pivoting. This initial preprocessing and preconditioning by
!>              a rank revealing QR factorization is common for all values of
!>              JOBA. Additional actions are specified as follows:
!>       = 'E': Computation as with 'C' with an additional estimate of the
!>              condition number of B. It provides a realistic error bound.
!>       = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings
!>              D1, D2, and well-conditioned matrix C, this option gives
!>              higher accuracy than the 'C' option. If the structure of the
!>              input matrix is not known, and relative accuracy is
!>              desirable, then this option is advisable. The input matrix A
!>              is preprocessed with QR factorization with FULL (row and
!>              column) pivoting.
!>       = 'G': Computation as with 'F' with an additional estimate of the
!>              condition number of B, where A=D*B. If A has heavily weighted
!>              rows, then using this condition number gives too pessimistic
!>              error bound.
!>       = 'A': Small singular values are the noise and the matrix is treated
!>              as numerically rank deficient. The error in the computed
!>              singular values is bounded by f(m,n)*epsilon*||A||.
!>              The computed SVD A = U * S * V^t restores A up to
!>              f(m,n)*epsilon*||A||.
!>              This gives the procedure the licence to discard (set to zero)
!>              all singular values below N*epsilon*||A||.
!>       = 'R': Similar as in 'A'. Rank revealing property of the initial
!>              QR factorization is used do reveal (using triangular factor)
!>              a gap sigma_{r+1} < epsilon * sigma_r in which case the
!>              numerical RANK is declared to be r. The SVD is computed with
!>              absolute error bounds, but more accurately than with 'A'.
!> 
[in]JOBU
!>          JOBU is CHARACTER*1
!>         Specifies whether to compute the columns of U:
!>       = 'U': N columns of U are returned in the array U.
!>       = 'F': full set of M left sing. vectors is returned in the array U.
!>       = 'W': U may be used as workspace of length M*N. See the description
!>              of U.
!>       = 'N': U is not computed.
!> 
[in]JOBV
!>          JOBV is CHARACTER*1
!>         Specifies whether to compute the matrix V:
!>       = 'V': N columns of V are returned in the array V; Jacobi rotations
!>              are not explicitly accumulated.
!>       = 'J': N columns of V are returned in the array V, but they are
!>              computed as the product of Jacobi rotations. This option is
!>              allowed only if JOBU .NE. 'N', i.e. in computing the full SVD.
!>       = 'W': V may be used as workspace of length N*N. See the description
!>              of V.
!>       = 'N': V is not computed.
!> 
[in]JOBR
!>          JOBR is CHARACTER*1
!>         Specifies the RANGE for the singular values. Issues the licence to
!>         set to zero small positive singular values if they are outside
!>         specified range. If A .NE. 0 is scaled so that the largest singular
!>         value of c*A is around SQRT(BIG), BIG=SLAMCH('O'), then JOBR issues
!>         the licence to kill columns of A whose norm in c*A is less than
!>         SQRT(SFMIN) (for JOBR = 'R'), or less than SMALL=SFMIN/EPSLN,
!>         where SFMIN=SLAMCH('S'), EPSLN=SLAMCH('E').
!>       = 'N': Do not kill small columns of c*A. This option assumes that
!>              BLAS and QR factorizations and triangular solvers are
!>              implemented to work in that range. If the condition of A
!>              is greater than BIG, use SGESVJ.
!>       = 'R': RESTRICTED range for sigma(c*A) is [SQRT(SFMIN), SQRT(BIG)]
!>              (roughly, as described above). This option is recommended.
!>                                             ===========================
!>         For computing the singular values in the FULL range [SFMIN,BIG]
!>         use SGESVJ.
!> 
[in]JOBT
!>          JOBT is CHARACTER*1
!>         If the matrix is square then the procedure may determine to use
!>         transposed A if A^t seems to be better with respect to convergence.
!>         If the matrix is not square, JOBT is ignored. This is subject to
!>         changes in the future.
!>         The decision is based on two values of entropy over the adjoint
!>         orbit of A^t * A. See the descriptions of WORK(6) and WORK(7).
!>       = 'T': transpose if entropy test indicates possibly faster
!>         convergence of Jacobi process if A^t is taken as input. If A is
!>         replaced with A^t, then the row pivoting is included automatically.
!>       = 'N': do not speculate.
!>         This option can be used to compute only the singular values, or the
!>         full SVD (U, SIGMA and V). For only one set of singular vectors
!>         (U or V), the caller should provide both U and V, as one of the
!>         matrices is used as workspace if the matrix A is transposed.
!>         The implementer can easily remove this constraint and make the
!>         code more complicated. See the descriptions of U and V.
!> 
[in]JOBP
!>          JOBP is CHARACTER*1
!>         Issues the licence to introduce structured perturbations to drown
!>         denormalized numbers. This licence should be active if the
!>         denormals are poorly implemented, causing slow computation,
!>         especially in cases of fast convergence (!). For details see [1,2].
!>         For the sake of simplicity, this perturbations are included only
!>         when the full SVD or only the singular values are requested. The
!>         implementer/user can easily add the perturbation for the cases of
!>         computing one set of singular vectors.
!>       = 'P': introduce perturbation
!>       = 'N': do not perturb
!> 
[in]M
!>          M is INTEGER
!>         The number of rows of the input matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>         The number of columns of the input matrix A. M >= N >= 0.
!> 
[in,out]A
!>          A is REAL array, dimension (LDA,N)
!>          On entry, the M-by-N matrix A.
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A.  LDA >= max(1,M).
!> 
[out]SVA
!>          SVA is REAL array, dimension (N)
!>          On exit,
!>          - For WORK(1)/WORK(2) = ONE: The singular values of A. During the
!>            computation SVA contains Euclidean column norms of the
!>            iterated matrices in the array A.
!>          - For WORK(1) .NE. WORK(2): The singular values of A are
!>            (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if
!>            sigma_max(A) overflows or if small singular values have been
!>            saved from underflow by scaling the input matrix A.
!>          - If JOBR='R' then some of the singular values may be returned
!>            as exact zeros obtained by  because they are
!>            below the numerical rank threshold or are denormalized numbers.
!> 
[out]U
!>          U is REAL array, dimension ( LDU, N )
!>          If JOBU = 'U', then U contains on exit the M-by-N matrix of
!>                         the left singular vectors.
!>          If JOBU = 'F', then U contains on exit the M-by-M matrix of
!>                         the left singular vectors, including an ONB
!>                         of the orthogonal complement of the Range(A).
!>          If JOBU = 'W'  .AND. (JOBV = 'V' .AND. JOBT = 'T' .AND. M = N),
!>                         then U is used as workspace if the procedure
!>                         replaces A with A^t. In that case, [V] is computed
!>                         in U as left singular vectors of A^t and then
!>                         copied back to the V array. This 'W' option is just
!>                         a reminder to the caller that in this case U is
!>                         reserved as workspace of length N*N.
!>          If JOBU = 'N'  U is not referenced, unless JOBT='T'.
!> 
[in]LDU
!>          LDU is INTEGER
!>          The leading dimension of the array U,  LDU >= 1.
!>          IF  JOBU = 'U' or 'F' or 'W',  then LDU >= M.
!> 
[out]V
!>          V is REAL array, dimension ( LDV, N )
!>          If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of
!>                         the right singular vectors;
!>          If JOBV = 'W', AND (JOBU = 'U' AND JOBT = 'T' AND M = N),
!>                         then V is used as workspace if the pprocedure
!>                         replaces A with A^t. In that case, [U] is computed
!>                         in V as right singular vectors of A^t and then
!>                         copied back to the U array. This 'W' option is just
!>                         a reminder to the caller that in this case V is
!>                         reserved as workspace of length N*N.
!>          If JOBV = 'N'  V is not referenced, unless JOBT='T'.
!> 
[in]LDV
!>          LDV is INTEGER
!>          The leading dimension of the array V,  LDV >= 1.
!>          If JOBV = 'V' or 'J' or 'W', then LDV >= N.
!> 
[out]WORK
!>          WORK is REAL array, dimension (LWORK)
!>          On exit,
!>          WORK(1) = SCALE = WORK(2) / WORK(1) is the scaling factor such
!>                    that SCALE*SVA(1:N) are the computed singular values
!>                    of A. (See the description of SVA().)
!>          WORK(2) = See the description of WORK(1).
!>          WORK(3) = SCONDA is an estimate for the condition number of
!>                    column equilibrated A. (If JOBA = 'E' or 'G')
!>                    SCONDA is an estimate of SQRT(||(R^t * R)^(-1)||_1).
!>                    It is computed using SPOCON. It holds
!>                    N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
!>                    where R is the triangular factor from the QRF of A.
!>                    However, if R is truncated and the numerical rank is
!>                    determined to be strictly smaller than N, SCONDA is
!>                    returned as -1, thus indicating that the smallest
!>                    singular values might be lost.
!>
!>          If full SVD is needed, the following two condition numbers are
!>          useful for the analysis of the algorithm. They are provided for
!>          a developer/implementer who is familiar with the details of
!>          the method.
!>
!>          WORK(4) = an estimate of the scaled condition number of the
!>                    triangular factor in the first QR factorization.
!>          WORK(5) = an estimate of the scaled condition number of the
!>                    triangular factor in the second QR factorization.
!>          The following two parameters are computed if JOBT = 'T'.
!>          They are provided for a developer/implementer who is familiar
!>          with the details of the method.
!>
!>          WORK(6) = the entropy of A^t*A :: this is the Shannon entropy
!>                    of diag(A^t*A) / Trace(A^t*A) taken as point in the
!>                    probability simplex.
!>          WORK(7) = the entropy of A*A^t.
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          Length of WORK to confirm proper allocation of work space.
!>          LWORK depends on the job:
!>
!>          If only SIGMA is needed ( JOBU = 'N', JOBV = 'N' ) and
!>            -> .. no scaled condition estimate required (JOBE = 'N'):
!>               LWORK >= max(2*M+N,4*N+1,7). This is the minimal requirement.
!>               ->> For optimal performance (blocked code) the optimal value
!>               is LWORK >= max(2*M+N,3*N+(N+1)*NB,7). Here NB is the optimal
!>               block size for DGEQP3 and DGEQRF.
!>               In general, optimal LWORK is computed as
!>               LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF), 7).
!>            -> .. an estimate of the scaled condition number of A is
!>               required (JOBA='E', 'G'). In this case, LWORK is the maximum
!>               of the above and N*N+4*N, i.e. LWORK >= max(2*M+N,N*N+4*N,7).
!>               ->> For optimal performance (blocked code) the optimal value
!>               is LWORK >= max(2*M+N,3*N+(N+1)*NB, N*N+4*N, 7).
!>               In general, the optimal length LWORK is computed as
!>               LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF),
!>                                                     N+N*N+LWORK(DPOCON),7).
!>
!>          If SIGMA and the right singular vectors are needed (JOBV = 'V'),
!>            -> the minimal requirement is LWORK >= max(2*M+N,4*N+1,7).
!>            -> For optimal performance, LWORK >= max(2*M+N,3*N+(N+1)*NB,7),
!>               where NB is the optimal block size for DGEQP3, DGEQRF, DGELQ,
!>               DORMLQ. In general, the optimal length LWORK is computed as
!>               LWORK >= max(2*M+N,N+LWORK(DGEQP3), N+LWORK(DPOCON),
!>                       N+LWORK(DGELQ), 2*N+LWORK(DGEQRF), N+LWORK(DORMLQ)).
!>
!>          If SIGMA and the left singular vectors are needed
!>            -> the minimal requirement is LWORK >= max(2*M+N,4*N+1,7).
!>            -> For optimal performance:
!>               if JOBU = 'U' :: LWORK >= max(2*M+N,3*N+(N+1)*NB,7),
!>               if JOBU = 'F' :: LWORK >= max(2*M+N,3*N+(N+1)*NB,N+M*NB,7),
!>               where NB is the optimal block size for DGEQP3, DGEQRF, DORMQR.
!>               In general, the optimal length LWORK is computed as
!>               LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DPOCON),
!>                        2*N+LWORK(DGEQRF), N+LWORK(DORMQR)).
!>               Here LWORK(DORMQR) equals N*NB (for JOBU = 'U') or
!>               M*NB (for JOBU = 'F').
!>
!>          If the full SVD is needed: (JOBU = 'U' or JOBU = 'F') and
!>            -> if JOBV = 'V'
!>               the minimal requirement is LWORK >= max(2*M+N,6*N+2*N*N).
!>            -> if JOBV = 'J' the minimal requirement is
!>               LWORK >= max(2*M+N, 4*N+N*N,2*N+N*N+6).
!>            -> For optimal performance, LWORK should be additionally
!>               larger than N+M*NB, where NB is the optimal block size
!>               for DORMQR.
!> 
[out]IWORK
!>          IWORK is INTEGER array, dimension (M+3*N).
!>          On exit,
!>          IWORK(1) = the numerical rank determined after the initial
!>                     QR factorization with pivoting. See the descriptions
!>                     of JOBA and JOBR.
!>          IWORK(2) = the number of the computed nonzero singular values
!>          IWORK(3) = if nonzero, a warning message:
!>                     If IWORK(3) = 1 then some of the column norms of A
!>                     were denormalized floats. The requested high accuracy
!>                     is not warranted by the data.
!> 
[out]INFO
!>          INFO is INTEGER
!>           < 0:  if INFO = -i, then the i-th argument had an illegal value.
!>           = 0:  successful exit;
!>           > 0:  SGEJSV  did not converge in the maximal allowed number
!>                 of sweeps. The computed values may be inaccurate.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Further Details:
!>
!>  SGEJSV implements a preconditioned Jacobi SVD algorithm. It uses SGEQP3,
!>  SGEQRF, and SGELQF as preprocessors and preconditioners. Optionally, an
!>  additional row pivoting can be used as a preprocessor, which in some
!>  cases results in much higher accuracy. An example is matrix A with the
!>  structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned
!>  diagonal matrices and C is well-conditioned matrix. In that case, complete
!>  pivoting in the first QR factorizations provides accuracy dependent on the
!>  condition number of C, and independent of D1, D2. Such higher accuracy is
!>  not completely understood theoretically, but it works well in practice.
!>  Further, if A can be written as A = B*D, with well-conditioned B and some
!>  diagonal D, then the high accuracy is guaranteed, both theoretically and
!>  in software, independent of D. For more details see [1], [2].
!>     The computational range for the singular values can be the full range
!>  ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS
!>  & LAPACK routines called by SGEJSV are implemented to work in that range.
!>  If that is not the case, then the restriction for safe computation with
!>  the singular values in the range of normalized IEEE numbers is that the
!>  spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not
!>  overflow. This code (SGEJSV) is best used in this restricted range,
!>  meaning that singular values of magnitude below ||A||_2 / SLAMCH('O') are
!>  returned as zeros. See JOBR for details on this.
!>     Further, this implementation is somewhat slower than the one described
!>  in [1,2] due to replacement of some non-LAPACK components, and because
!>  the choice of some tuning parameters in the iterative part (SGESVJ) is
!>  left to the implementer on a particular machine.
!>     The rank revealing QR factorization (in this code: SGEQP3) should be
!>  implemented as in [3]. We have a new version of SGEQP3 under development
!>  that is more robust than the current one in LAPACK, with a cleaner cut in
!>  rank deficient cases. It will be available in the SIGMA library [4].
!>  If M is much larger than N, it is obvious that the initial QRF with
!>  column pivoting can be preprocessed by the QRF without pivoting. That
!>  well known trick is not used in SGEJSV because in some cases heavy row
!>  weighting can be treated with complete pivoting. The overhead in cases
!>  M much larger than N is then only due to pivoting, but the benefits in
!>  terms of accuracy have prevailed. The implementer/user can incorporate
!>  this extra QRF step easily. The implementer can also improve data movement
!>  (matrix transpose, matrix copy, matrix transposed copy) - this
!>  implementation of SGEJSV uses only the simplest, naive data movement.
!> 
Contributors:
Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany)
References:
!>
!> [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I.
!>     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342.
!>     LAPACK Working note 169.
!> [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II.
!>     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362.
!>     LAPACK Working note 170.
!> [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR
!>     factorization software - a case study.
!>     ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28.
!>     LAPACK Working note 176.
!> [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV,
!>     QSVD, (H,K)-SVD computations.
!>     Department of Mathematics, University of Zagreb, 2008.
!> 
Bugs, examples and comments:
Please report all bugs and send interesting examples and/or comments to drmac.nosp@m.@mat.nosp@m.h.hr. Thank you.

Definition at line 473 of file sgejsv.f.

476*
477* -- LAPACK computational routine --
478* -- LAPACK is a software package provided by Univ. of Tennessee, --
479* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
480*
481* .. Scalar Arguments ..
482 IMPLICIT NONE
483 INTEGER INFO, LDA, LDU, LDV, LWORK, M, N
484* ..
485* .. Array Arguments ..
486 REAL A( LDA, * ), SVA( N ), U( LDU, * ), V( LDV, * ),
487 $ WORK( LWORK )
488 INTEGER IWORK( * )
489 CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV
490* ..
491*
492* ===========================================================================
493*
494* .. Local Parameters ..
495 REAL ZERO, ONE
496 parameter( zero = 0.0e0, one = 1.0e0 )
497* ..
498* .. Local Scalars ..
499 REAL AAPP, AAQQ, AATMAX, AATMIN, BIG, BIG1, COND_OK,
500 $ CONDR1, CONDR2, ENTRA, ENTRAT, EPSLN, MAXPRJ, SCALEM,
501 $ SCONDA, SFMIN, SMALL, TEMP1, USCAL1, USCAL2, XSC
502 INTEGER IERR, N1, NR, NUMRANK, p, q, WARNING
503 LOGICAL ALMORT, DEFR, ERREST, GOSCAL, JRACC, KILL, LSVEC,
504 $ L2ABER, L2KILL, L2PERT, L2RANK, L2TRAN,
505 $ NOSCAL, ROWPIV, RSVEC, TRANSP
506* ..
507* .. Intrinsic Functions ..
508 INTRINSIC abs, alog, max, min, float, nint, sign, sqrt
509* ..
510* .. External Functions ..
511 REAL SLAMCH, SNRM2
512 INTEGER ISAMAX
513 LOGICAL LSAME
514 EXTERNAL isamax, lsame, slamch, snrm2
515* ..
516* .. External Subroutines ..
517 EXTERNAL scopy, sgelqf, sgeqp3, sgeqrf, slacpy, slascl,
520*
521 EXTERNAL sgesvj
522* ..
523*
524* Test the input arguments
525*
526 lsvec = lsame( jobu, 'U' ) .OR. lsame( jobu, 'F' )
527 jracc = lsame( jobv, 'J' )
528 rsvec = lsame( jobv, 'V' ) .OR. jracc
529 rowpiv = lsame( joba, 'F' ) .OR. lsame( joba, 'G' )
530 l2rank = lsame( joba, 'R' )
531 l2aber = lsame( joba, 'A' )
532 errest = lsame( joba, 'E' ) .OR. lsame( joba, 'G' )
533 l2tran = lsame( jobt, 'T' )
534 l2kill = lsame( jobr, 'R' )
535 defr = lsame( jobr, 'N' )
536 l2pert = lsame( jobp, 'P' )
537*
538 IF ( .NOT.(rowpiv .OR. l2rank .OR. l2aber .OR.
539 $ errest .OR. lsame( joba, 'C' ) )) THEN
540 info = - 1
541 ELSE IF ( .NOT.( lsvec .OR. lsame( jobu, 'N' ) .OR.
542 $ lsame( jobu, 'W' )) ) THEN
543 info = - 2
544 ELSE IF ( .NOT.( rsvec .OR. lsame( jobv, 'N' ) .OR.
545 $ lsame( jobv, 'W' )) .OR. ( jracc .AND. (.NOT.lsvec) ) ) THEN
546 info = - 3
547 ELSE IF ( .NOT. ( l2kill .OR. defr ) ) THEN
548 info = - 4
549 ELSE IF ( .NOT. ( l2tran .OR. lsame( jobt, 'N' ) ) ) THEN
550 info = - 5
551 ELSE IF ( .NOT. ( l2pert .OR. lsame( jobp, 'N' ) ) ) THEN
552 info = - 6
553 ELSE IF ( m .LT. 0 ) THEN
554 info = - 7
555 ELSE IF ( ( n .LT. 0 ) .OR. ( n .GT. m ) ) THEN
556 info = - 8
557 ELSE IF ( lda .LT. m ) THEN
558 info = - 10
559 ELSE IF ( lsvec .AND. ( ldu .LT. m ) ) THEN
560 info = - 13
561 ELSE IF ( rsvec .AND. ( ldv .LT. n ) ) THEN
562 info = - 15
563 ELSE IF ( (.NOT.(lsvec .OR. rsvec .OR. errest).AND.
564 $ (lwork .LT. max(7,4*n+1,2*m+n))) .OR.
565 $ (.NOT.(lsvec .OR. rsvec) .AND. errest .AND.
566 $ (lwork .LT. max(7,4*n+n*n,2*m+n))) .OR.
567 $ (lsvec .AND. (.NOT.rsvec) .AND. (lwork .LT. max(7,2*m+n,4*n+1)))
568 $ .OR.
569 $ (rsvec .AND. (.NOT.lsvec) .AND. (lwork .LT. max(7,2*m+n,4*n+1)))
570 $ .OR.
571 $ (lsvec .AND. rsvec .AND. (.NOT.jracc) .AND.
572 $ (lwork.LT.max(2*m+n,6*n+2*n*n)))
573 $ .OR. (lsvec .AND. rsvec .AND. jracc .AND.
574 $ lwork.LT.max(2*m+n,4*n+n*n,2*n+n*n+6)))
575 $ THEN
576 info = - 17
577 ELSE
578* #:)
579 info = 0
580 END IF
581*
582 IF ( info .NE. 0 ) THEN
583* #:(
584 CALL xerbla( 'SGEJSV', - info )
585 RETURN
586 END IF
587*
588* Quick return for void matrix (Y3K safe)
589* #:)
590 IF ( ( m .EQ. 0 ) .OR. ( n .EQ. 0 ) ) THEN
591 iwork(1:3) = 0
592 work(1:7) = 0
593 RETURN
594 ENDIF
595*
596* Determine whether the matrix U should be M x N or M x M
597*
598 IF ( lsvec ) THEN
599 n1 = n
600 IF ( lsame( jobu, 'F' ) ) n1 = m
601 END IF
602*
603* Set numerical parameters
604*
605*! NOTE: Make sure SLAMCH() does not fail on the target architecture.
606*
607 epsln = slamch('Epsilon')
608 sfmin = slamch('SafeMinimum')
609 small = sfmin / epsln
610 big = slamch('O')
611* BIG = ONE / SFMIN
612*
613* Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N
614*
615*(!) If necessary, scale SVA() to protect the largest norm from
616* overflow. It is possible that this scaling pushes the smallest
617* column norm left from the underflow threshold (extreme case).
618*
619 scalem = one / sqrt(float(m)*float(n))
620 noscal = .true.
621 goscal = .true.
622 DO 1874 p = 1, n
623 aapp = zero
624 aaqq = one
625 CALL slassq( m, a(1,p), 1, aapp, aaqq )
626 IF ( aapp .GT. big ) THEN
627 info = - 9
628 CALL xerbla( 'SGEJSV', -info )
629 RETURN
630 END IF
631 aaqq = sqrt(aaqq)
632 IF ( ( aapp .LT. (big / aaqq) ) .AND. noscal ) THEN
633 sva(p) = aapp * aaqq
634 ELSE
635 noscal = .false.
636 sva(p) = aapp * ( aaqq * scalem )
637 IF ( goscal ) THEN
638 goscal = .false.
639 CALL sscal( p-1, scalem, sva, 1 )
640 END IF
641 END IF
642 1874 CONTINUE
643*
644 IF ( noscal ) scalem = one
645*
646 aapp = zero
647 aaqq = big
648 DO 4781 p = 1, n
649 aapp = max( aapp, sva(p) )
650 IF ( sva(p) .NE. zero ) aaqq = min( aaqq, sva(p) )
651 4781 CONTINUE
652*
653* Quick return for zero M x N matrix
654* #:)
655 IF ( aapp .EQ. zero ) THEN
656 IF ( lsvec ) CALL slaset( 'G', m, n1, zero, one, u, ldu )
657 IF ( rsvec ) CALL slaset( 'G', n, n, zero, one, v, ldv )
658 work(1) = one
659 work(2) = one
660 IF ( errest ) work(3) = one
661 IF ( lsvec .AND. rsvec ) THEN
662 work(4) = one
663 work(5) = one
664 END IF
665 IF ( l2tran ) THEN
666 work(6) = zero
667 work(7) = zero
668 END IF
669 iwork(1) = 0
670 iwork(2) = 0
671 iwork(3) = 0
672 RETURN
673 END IF
674*
675* Issue warning if denormalized column norms detected. Override the
676* high relative accuracy request. Issue licence to kill columns
677* (set them to zero) whose norm is less than sigma_max / BIG (roughly).
678* #:(
679 warning = 0
680 IF ( aaqq .LE. sfmin ) THEN
681 l2rank = .true.
682 l2kill = .true.
683 warning = 1
684 END IF
685*
686* Quick return for one-column matrix
687* #:)
688 IF ( n .EQ. 1 ) THEN
689*
690 IF ( lsvec ) THEN
691 CALL slascl( 'G',0,0,sva(1),scalem, m,1,a(1,1),lda,ierr )
692 CALL slacpy( 'A', m, 1, a, lda, u, ldu )
693* computing all M left singular vectors of the M x 1 matrix
694 IF ( n1 .NE. n ) THEN
695 CALL sgeqrf( m, n, u,ldu, work, work(n+1),lwork-n,ierr )
696 CALL sorgqr( m,n1,1, u,ldu,work,work(n+1),lwork-n,ierr )
697 CALL scopy( m, a(1,1), 1, u(1,1), 1 )
698 END IF
699 END IF
700 IF ( rsvec ) THEN
701 v(1,1) = one
702 END IF
703 IF ( sva(1) .LT. (big*scalem) ) THEN
704 sva(1) = sva(1) / scalem
705 scalem = one
706 END IF
707 work(1) = one / scalem
708 work(2) = one
709 IF ( sva(1) .NE. zero ) THEN
710 iwork(1) = 1
711 IF ( ( sva(1) / scalem) .GE. sfmin ) THEN
712 iwork(2) = 1
713 ELSE
714 iwork(2) = 0
715 END IF
716 ELSE
717 iwork(1) = 0
718 iwork(2) = 0
719 END IF
720 iwork(3) = 0
721 IF ( errest ) work(3) = one
722 IF ( lsvec .AND. rsvec ) THEN
723 work(4) = one
724 work(5) = one
725 END IF
726 IF ( l2tran ) THEN
727 work(6) = zero
728 work(7) = zero
729 END IF
730 RETURN
731*
732 END IF
733*
734 transp = .false.
735 l2tran = l2tran .AND. ( m .EQ. n )
736*
737 aatmax = -one
738 aatmin = big
739 IF ( rowpiv .OR. l2tran ) THEN
740*
741* Compute the row norms, needed to determine row pivoting sequence
742* (in the case of heavily row weighted A, row pivoting is strongly
743* advised) and to collect information needed to compare the
744* structures of A * A^t and A^t * A (in the case L2TRAN.EQ..TRUE.).
745*
746 IF ( l2tran ) THEN
747 DO 1950 p = 1, m
748 xsc = zero
749 temp1 = one
750 CALL slassq( n, a(p,1), lda, xsc, temp1 )
751* SLASSQ gets both the ell_2 and the ell_infinity norm
752* in one pass through the vector
753 work(m+n+p) = xsc * scalem
754 work(n+p) = xsc * (scalem*sqrt(temp1))
755 aatmax = max( aatmax, work(n+p) )
756 IF (work(n+p) .NE. zero) aatmin = min(aatmin,work(n+p))
757 1950 CONTINUE
758 ELSE
759 DO 1904 p = 1, m
760 work(m+n+p) = scalem*abs( a(p,isamax(n,a(p,1),lda)) )
761 aatmax = max( aatmax, work(m+n+p) )
762 aatmin = min( aatmin, work(m+n+p) )
763 1904 CONTINUE
764 END IF
765*
766 END IF
767*
768* For square matrix A try to determine whether A^t would be better
769* input for the preconditioned Jacobi SVD, with faster convergence.
770* The decision is based on an O(N) function of the vector of column
771* and row norms of A, based on the Shannon entropy. This should give
772* the right choice in most cases when the difference actually matters.
773* It may fail and pick the slower converging side.
774*
775 entra = zero
776 entrat = zero
777 IF ( l2tran ) THEN
778*
779 xsc = zero
780 temp1 = one
781 CALL slassq( n, sva, 1, xsc, temp1 )
782 temp1 = one / temp1
783*
784 entra = zero
785 DO 1113 p = 1, n
786 big1 = ( ( sva(p) / xsc )**2 ) * temp1
787 IF ( big1 .NE. zero ) entra = entra + big1 * alog(big1)
788 1113 CONTINUE
789 entra = - entra / alog(float(n))
790*
791* Now, SVA().^2/Trace(A^t * A) is a point in the probability simplex.
792* It is derived from the diagonal of A^t * A. Do the same with the
793* diagonal of A * A^t, compute the entropy of the corresponding
794* probability distribution. Note that A * A^t and A^t * A have the
795* same trace.
796*
797 entrat = zero
798 DO 1114 p = n+1, n+m
799 big1 = ( ( work(p) / xsc )**2 ) * temp1
800 IF ( big1 .NE. zero ) entrat = entrat + big1 * alog(big1)
801 1114 CONTINUE
802 entrat = - entrat / alog(float(m))
803*
804* Analyze the entropies and decide A or A^t. Smaller entropy
805* usually means better input for the algorithm.
806*
807 transp = ( entrat .LT. entra )
808*
809* If A^t is better than A, transpose A.
810*
811 IF ( transp ) THEN
812* In an optimal implementation, this trivial transpose
813* should be replaced with faster transpose.
814 DO 1115 p = 1, n - 1
815 DO 1116 q = p + 1, n
816 temp1 = a(q,p)
817 a(q,p) = a(p,q)
818 a(p,q) = temp1
819 1116 CONTINUE
820 1115 CONTINUE
821 DO 1117 p = 1, n
822 work(m+n+p) = sva(p)
823 sva(p) = work(n+p)
824 1117 CONTINUE
825 temp1 = aapp
826 aapp = aatmax
827 aatmax = temp1
828 temp1 = aaqq
829 aaqq = aatmin
830 aatmin = temp1
831 kill = lsvec
832 lsvec = rsvec
833 rsvec = kill
834 IF ( lsvec ) n1 = n
835*
836 rowpiv = .true.
837 END IF
838*
839 END IF
840* END IF L2TRAN
841*
842* Scale the matrix so that its maximal singular value remains less
843* than SQRT(BIG) -- the matrix is scaled so that its maximal column
844* has Euclidean norm equal to SQRT(BIG/N). The only reason to keep
845* SQRT(BIG) instead of BIG is the fact that SGEJSV uses LAPACK and
846* BLAS routines that, in some implementations, are not capable of
847* working in the full interval [SFMIN,BIG] and that they may provoke
848* overflows in the intermediate results. If the singular values spread
849* from SFMIN to BIG, then SGESVJ will compute them. So, in that case,
850* one should use SGESVJ instead of SGEJSV.
851*
852 big1 = sqrt( big )
853 temp1 = sqrt( big / float(n) )
854*
855 CALL slascl( 'G', 0, 0, aapp, temp1, n, 1, sva, n, ierr )
856 IF ( aaqq .GT. (aapp * sfmin) ) THEN
857 aaqq = ( aaqq / aapp ) * temp1
858 ELSE
859 aaqq = ( aaqq * temp1 ) / aapp
860 END IF
861 temp1 = temp1 * scalem
862 CALL slascl( 'G', 0, 0, aapp, temp1, m, n, a, lda, ierr )
863*
864* To undo scaling at the end of this procedure, multiply the
865* computed singular values with USCAL2 / USCAL1.
866*
867 uscal1 = temp1
868 uscal2 = aapp
869*
870 IF ( l2kill ) THEN
871* L2KILL enforces computation of nonzero singular values in
872* the restricted range of condition number of the initial A,
873* sigma_max(A) / sigma_min(A) approx. SQRT(BIG)/SQRT(SFMIN).
874 xsc = sqrt( sfmin )
875 ELSE
876 xsc = small
877*
878* Now, if the condition number of A is too big,
879* sigma_max(A) / sigma_min(A) .GT. SQRT(BIG/N) * EPSLN / SFMIN,
880* as a precaution measure, the full SVD is computed using SGESVJ
881* with accumulated Jacobi rotations. This provides numerically
882* more robust computation, at the cost of slightly increased run
883* time. Depending on the concrete implementation of BLAS and LAPACK
884* (i.e. how they behave in presence of extreme ill-conditioning) the
885* implementor may decide to remove this switch.
886 IF ( ( aaqq.LT.sqrt(sfmin) ) .AND. lsvec .AND. rsvec ) THEN
887 jracc = .true.
888 END IF
889*
890 END IF
891 IF ( aaqq .LT. xsc ) THEN
892 DO 700 p = 1, n
893 IF ( sva(p) .LT. xsc ) THEN
894 CALL slaset( 'A', m, 1, zero, zero, a(1,p), lda )
895 sva(p) = zero
896 END IF
897 700 CONTINUE
898 END IF
899*
900* Preconditioning using QR factorization with pivoting
901*
902 IF ( rowpiv ) THEN
903* Optional row permutation (Bjoerck row pivoting):
904* A result by Cox and Higham shows that the Bjoerck's
905* row pivoting combined with standard column pivoting
906* has similar effect as Powell-Reid complete pivoting.
907* The ell-infinity norms of A are made nonincreasing.
908 DO 1952 p = 1, m - 1
909 q = isamax( m-p+1, work(m+n+p), 1 ) + p - 1
910 iwork(2*n+p) = q
911 IF ( p .NE. q ) THEN
912 temp1 = work(m+n+p)
913 work(m+n+p) = work(m+n+q)
914 work(m+n+q) = temp1
915 END IF
916 1952 CONTINUE
917 CALL slaswp( n, a, lda, 1, m-1, iwork(2*n+1), 1 )
918 END IF
919*
920* End of the preparation phase (scaling, optional sorting and
921* transposing, optional flushing of small columns).
922*
923* Preconditioning
924*
925* If the full SVD is needed, the right singular vectors are computed
926* from a matrix equation, and for that we need theoretical analysis
927* of the Businger-Golub pivoting. So we use SGEQP3 as the first RR QRF.
928* In all other cases the first RR QRF can be chosen by other criteria
929* (eg speed by replacing global with restricted window pivoting, such
930* as in SGEQPX from TOMS # 782). Good results will be obtained using
931* SGEQPX with properly (!) chosen numerical parameters.
932* Any improvement of SGEQP3 improves overall performance of SGEJSV.
933*
934* A * P1 = Q1 * [ R1^t 0]^t:
935 DO 1963 p = 1, n
936* .. all columns are free columns
937 iwork(p) = 0
938 1963 CONTINUE
939 CALL sgeqp3( m,n,a,lda, iwork,work, work(n+1),lwork-n, ierr )
940*
941* The upper triangular matrix R1 from the first QRF is inspected for
942* rank deficiency and possibilities for deflation, or possible
943* ill-conditioning. Depending on the user specified flag L2RANK,
944* the procedure explores possibilities to reduce the numerical
945* rank by inspecting the computed upper triangular factor. If
946* L2RANK or L2ABER are up, then SGEJSV will compute the SVD of
947* A + dA, where ||dA|| <= f(M,N)*EPSLN.
948*
949 nr = 1
950 IF ( l2aber ) THEN
951* Standard absolute error bound suffices. All sigma_i with
952* sigma_i < N*EPSLN*||A|| are flushed to zero. This is an
953* aggressive enforcement of lower numerical rank by introducing a
954* backward error of the order of N*EPSLN*||A||.
955 temp1 = sqrt(float(n))*epsln
956 DO 3001 p = 2, n
957 IF ( abs(a(p,p)) .GE. (temp1*abs(a(1,1))) ) THEN
958 nr = nr + 1
959 ELSE
960 GO TO 3002
961 END IF
962 3001 CONTINUE
963 3002 CONTINUE
964 ELSE IF ( l2rank ) THEN
965* .. similarly as above, only slightly more gentle (less aggressive).
966* Sudden drop on the diagonal of R1 is used as the criterion for
967* close-to-rank-deficient.
968 temp1 = sqrt(sfmin)
969 DO 3401 p = 2, n
970 IF ( ( abs(a(p,p)) .LT. (epsln*abs(a(p-1,p-1))) ) .OR.
971 $ ( abs(a(p,p)) .LT. small ) .OR.
972 $ ( l2kill .AND. (abs(a(p,p)) .LT. temp1) ) ) GO TO 3402
973 nr = nr + 1
974 3401 CONTINUE
975 3402 CONTINUE
976*
977 ELSE
978* The goal is high relative accuracy. However, if the matrix
979* has high scaled condition number the relative accuracy is in
980* general not feasible. Later on, a condition number estimator
981* will be deployed to estimate the scaled condition number.
982* Here we just remove the underflowed part of the triangular
983* factor. This prevents the situation in which the code is
984* working hard to get the accuracy not warranted by the data.
985 temp1 = sqrt(sfmin)
986 DO 3301 p = 2, n
987 IF ( ( abs(a(p,p)) .LT. small ) .OR.
988 $ ( l2kill .AND. (abs(a(p,p)) .LT. temp1) ) ) GO TO 3302
989 nr = nr + 1
990 3301 CONTINUE
991 3302 CONTINUE
992*
993 END IF
994*
995 almort = .false.
996 IF ( nr .EQ. n ) THEN
997 maxprj = one
998 DO 3051 p = 2, n
999 temp1 = abs(a(p,p)) / sva(iwork(p))
1000 maxprj = min( maxprj, temp1 )
1001 3051 CONTINUE
1002 IF ( maxprj**2 .GE. one - float(n)*epsln ) almort = .true.
1003 END IF
1004*
1005*
1006 sconda = - one
1007 condr1 = - one
1008 condr2 = - one
1009*
1010 IF ( errest ) THEN
1011 IF ( n .EQ. nr ) THEN
1012 IF ( rsvec ) THEN
1013* .. V is available as workspace
1014 CALL slacpy( 'U', n, n, a, lda, v, ldv )
1015 DO 3053 p = 1, n
1016 temp1 = sva(iwork(p))
1017 CALL sscal( p, one/temp1, v(1,p), 1 )
1018 3053 CONTINUE
1019 CALL spocon( 'U', n, v, ldv, one, temp1,
1020 $ work(n+1), iwork(2*n+m+1), ierr )
1021 ELSE IF ( lsvec ) THEN
1022* .. U is available as workspace
1023 CALL slacpy( 'U', n, n, a, lda, u, ldu )
1024 DO 3054 p = 1, n
1025 temp1 = sva(iwork(p))
1026 CALL sscal( p, one/temp1, u(1,p), 1 )
1027 3054 CONTINUE
1028 CALL spocon( 'U', n, u, ldu, one, temp1,
1029 $ work(n+1), iwork(2*n+m+1), ierr )
1030 ELSE
1031 CALL slacpy( 'U', n, n, a, lda, work(n+1), n )
1032 DO 3052 p = 1, n
1033 temp1 = sva(iwork(p))
1034 CALL sscal( p, one/temp1, work(n+(p-1)*n+1), 1 )
1035 3052 CONTINUE
1036* .. the columns of R are scaled to have unit Euclidean lengths.
1037 CALL spocon( 'U', n, work(n+1), n, one, temp1,
1038 $ work(n+n*n+1), iwork(2*n+m+1), ierr )
1039 END IF
1040 sconda = one / sqrt(temp1)
1041* SCONDA is an estimate of SQRT(||(R^t * R)^(-1)||_1).
1042* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
1043 ELSE
1044 sconda = - one
1045 END IF
1046 END IF
1047*
1048 l2pert = l2pert .AND. ( abs( a(1,1)/a(nr,nr) ) .GT. sqrt(big1) )
1049* If there is no violent scaling, artificial perturbation is not needed.
1050*
1051* Phase 3:
1052*
1053 IF ( .NOT. ( rsvec .OR. lsvec ) ) THEN
1054*
1055* Singular Values only
1056*
1057* .. transpose A(1:NR,1:N)
1058 DO 1946 p = 1, min( n-1, nr )
1059 CALL scopy( n-p, a(p,p+1), lda, a(p+1,p), 1 )
1060 1946 CONTINUE
1061*
1062* The following two DO-loops introduce small relative perturbation
1063* into the strict upper triangle of the lower triangular matrix.
1064* Small entries below the main diagonal are also changed.
1065* This modification is useful if the computing environment does not
1066* provide/allow FLUSH TO ZERO underflow, for it prevents many
1067* annoying denormalized numbers in case of strongly scaled matrices.
1068* The perturbation is structured so that it does not introduce any
1069* new perturbation of the singular values, and it does not destroy
1070* the job done by the preconditioner.
1071* The licence for this perturbation is in the variable L2PERT, which
1072* should be .FALSE. if FLUSH TO ZERO underflow is active.
1073*
1074 IF ( .NOT. almort ) THEN
1075*
1076 IF ( l2pert ) THEN
1077* XSC = SQRT(SMALL)
1078 xsc = epsln / float(n)
1079 DO 4947 q = 1, nr
1080 temp1 = xsc*abs(a(q,q))
1081 DO 4949 p = 1, n
1082 IF ( ( (p.GT.q) .AND. (abs(a(p,q)).LE.temp1) )
1083 $ .OR. ( p .LT. q ) )
1084 $ a(p,q) = sign( temp1, a(p,q) )
1085 4949 CONTINUE
1086 4947 CONTINUE
1087 ELSE
1088 CALL slaset( 'U', nr-1,nr-1, zero,zero, a(1,2),lda )
1089 END IF
1090*
1091* .. second preconditioning using the QR factorization
1092*
1093 CALL sgeqrf( n,nr, a,lda, work, work(n+1),lwork-n, ierr )
1094*
1095* .. and transpose upper to lower triangular
1096 DO 1948 p = 1, nr - 1
1097 CALL scopy( nr-p, a(p,p+1), lda, a(p+1,p), 1 )
1098 1948 CONTINUE
1099*
1100 END IF
1101*
1102* Row-cyclic Jacobi SVD algorithm with column pivoting
1103*
1104* .. again some perturbation (a "background noise") is added
1105* to drown denormals
1106 IF ( l2pert ) THEN
1107* XSC = SQRT(SMALL)
1108 xsc = epsln / float(n)
1109 DO 1947 q = 1, nr
1110 temp1 = xsc*abs(a(q,q))
1111 DO 1949 p = 1, nr
1112 IF ( ( (p.GT.q) .AND. (abs(a(p,q)).LE.temp1) )
1113 $ .OR. ( p .LT. q ) )
1114 $ a(p,q) = sign( temp1, a(p,q) )
1115 1949 CONTINUE
1116 1947 CONTINUE
1117 ELSE
1118 CALL slaset( 'U', nr-1, nr-1, zero, zero, a(1,2), lda )
1119 END IF
1120*
1121* .. and one-sided Jacobi rotations are started on a lower
1122* triangular matrix (plus perturbation which is ignored in
1123* the part which destroys triangular form (confusing?!))
1124*
1125 CALL sgesvj( 'L', 'NoU', 'NoV', nr, nr, a, lda, sva,
1126 $ n, v, ldv, work, lwork, info )
1127*
1128 scalem = work(1)
1129 numrank = nint(work(2))
1130*
1131*
1132 ELSE IF ( rsvec .AND. ( .NOT. lsvec ) ) THEN
1133*
1134* -> Singular Values and Right Singular Vectors <-
1135*
1136 IF ( almort ) THEN
1137*
1138* .. in this case NR equals N
1139 DO 1998 p = 1, nr
1140 CALL scopy( n-p+1, a(p,p), lda, v(p,p), 1 )
1141 1998 CONTINUE
1142 CALL slaset( 'Upper', nr-1, nr-1, zero, zero, v(1,2), ldv )
1143*
1144 CALL sgesvj( 'L','U','N', n, nr, v,ldv, sva, nr, a,lda,
1145 $ work, lwork, info )
1146 scalem = work(1)
1147 numrank = nint(work(2))
1148
1149 ELSE
1150*
1151* .. two more QR factorizations ( one QRF is not enough, two require
1152* accumulated product of Jacobi rotations, three are perfect )
1153*
1154 CALL slaset( 'Lower', nr-1, nr-1, zero, zero, a(2,1), lda )
1155 CALL sgelqf( nr, n, a, lda, work, work(n+1), lwork-n, ierr)
1156 CALL slacpy( 'Lower', nr, nr, a, lda, v, ldv )
1157 CALL slaset( 'Upper', nr-1, nr-1, zero, zero, v(1,2), ldv )
1158 CALL sgeqrf( nr, nr, v, ldv, work(n+1), work(2*n+1),
1159 $ lwork-2*n, ierr )
1160 DO 8998 p = 1, nr
1161 CALL scopy( nr-p+1, v(p,p), ldv, v(p,p), 1 )
1162 8998 CONTINUE
1163 CALL slaset( 'Upper', nr-1, nr-1, zero, zero, v(1,2), ldv )
1164*
1165 CALL sgesvj( 'Lower', 'U','N', nr, nr, v,ldv, sva, nr, u,
1166 $ ldu, work(n+1), lwork-n, info )
1167 scalem = work(n+1)
1168 numrank = nint(work(n+2))
1169 IF ( nr .LT. n ) THEN
1170 CALL slaset( 'A',n-nr, nr, zero,zero, v(nr+1,1), ldv )
1171 CALL slaset( 'A',nr, n-nr, zero,zero, v(1,nr+1), ldv )
1172 CALL slaset( 'A',n-nr,n-nr,zero,one, v(nr+1,nr+1), ldv )
1173 END IF
1174*
1175 CALL sormlq( 'Left', 'Transpose', n, n, nr, a, lda, work,
1176 $ v, ldv, work(n+1), lwork-n, ierr )
1177*
1178 END IF
1179*
1180 DO 8991 p = 1, n
1181 CALL scopy( n, v(p,1), ldv, a(iwork(p),1), lda )
1182 8991 CONTINUE
1183 CALL slacpy( 'All', n, n, a, lda, v, ldv )
1184*
1185 IF ( transp ) THEN
1186 CALL slacpy( 'All', n, n, v, ldv, u, ldu )
1187 END IF
1188*
1189 ELSE IF ( lsvec .AND. ( .NOT. rsvec ) ) THEN
1190*
1191* .. Singular Values and Left Singular Vectors ..
1192*
1193* .. second preconditioning step to avoid need to accumulate
1194* Jacobi rotations in the Jacobi iterations.
1195 DO 1965 p = 1, nr
1196 CALL scopy( n-p+1, a(p,p), lda, u(p,p), 1 )
1197 1965 CONTINUE
1198 CALL slaset( 'Upper', nr-1, nr-1, zero, zero, u(1,2), ldu )
1199*
1200 CALL sgeqrf( n, nr, u, ldu, work(n+1), work(2*n+1),
1201 $ lwork-2*n, ierr )
1202*
1203 DO 1967 p = 1, nr - 1
1204 CALL scopy( nr-p, u(p,p+1), ldu, u(p+1,p), 1 )
1205 1967 CONTINUE
1206 CALL slaset( 'Upper', nr-1, nr-1, zero, zero, u(1,2), ldu )
1207*
1208 CALL sgesvj( 'Lower', 'U', 'N', nr,nr, u, ldu, sva, nr, a,
1209 $ lda, work(n+1), lwork-n, info )
1210 scalem = work(n+1)
1211 numrank = nint(work(n+2))
1212*
1213 IF ( nr .LT. m ) THEN
1214 CALL slaset( 'A', m-nr, nr,zero, zero, u(nr+1,1), ldu )
1215 IF ( nr .LT. n1 ) THEN
1216 CALL slaset( 'A',nr, n1-nr, zero, zero, u(1,nr+1), ldu )
1217 CALL slaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu )
1218 END IF
1219 END IF
1220*
1221 CALL sormqr( 'Left', 'No Tr', m, n1, n, a, lda, work, u,
1222 $ ldu, work(n+1), lwork-n, ierr )
1223*
1224 IF ( rowpiv )
1225 $ CALL slaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 )
1226*
1227 DO 1974 p = 1, n1
1228 xsc = one / snrm2( m, u(1,p), 1 )
1229 CALL sscal( m, xsc, u(1,p), 1 )
1230 1974 CONTINUE
1231*
1232 IF ( transp ) THEN
1233 CALL slacpy( 'All', n, n, u, ldu, v, ldv )
1234 END IF
1235*
1236 ELSE
1237*
1238* .. Full SVD ..
1239*
1240 IF ( .NOT. jracc ) THEN
1241*
1242 IF ( .NOT. almort ) THEN
1243*
1244* Second Preconditioning Step (QRF [with pivoting])
1245* Note that the composition of TRANSPOSE, QRF and TRANSPOSE is
1246* equivalent to an LQF CALL. Since in many libraries the QRF
1247* seems to be better optimized than the LQF, we do explicit
1248* transpose and use the QRF. This is subject to changes in an
1249* optimized implementation of SGEJSV.
1250*
1251 DO 1968 p = 1, nr
1252 CALL scopy( n-p+1, a(p,p), lda, v(p,p), 1 )
1253 1968 CONTINUE
1254*
1255* .. the following two loops perturb small entries to avoid
1256* denormals in the second QR factorization, where they are
1257* as good as zeros. This is done to avoid painfully slow
1258* computation with denormals. The relative size of the perturbation
1259* is a parameter that can be changed by the implementer.
1260* This perturbation device will be obsolete on machines with
1261* properly implemented arithmetic.
1262* To switch it off, set L2PERT=.FALSE. To remove it from the
1263* code, remove the action under L2PERT=.TRUE., leave the ELSE part.
1264* The following two loops should be blocked and fused with the
1265* transposed copy above.
1266*
1267 IF ( l2pert ) THEN
1268 xsc = sqrt(small)
1269 DO 2969 q = 1, nr
1270 temp1 = xsc*abs( v(q,q) )
1271 DO 2968 p = 1, n
1272 IF ( ( p .GT. q ) .AND. ( abs(v(p,q)) .LE. temp1 )
1273 $ .OR. ( p .LT. q ) )
1274 $ v(p,q) = sign( temp1, v(p,q) )
1275 IF ( p .LT. q ) v(p,q) = - v(p,q)
1276 2968 CONTINUE
1277 2969 CONTINUE
1278 ELSE
1279 CALL slaset( 'U', nr-1, nr-1, zero, zero, v(1,2), ldv )
1280 END IF
1281*
1282* Estimate the row scaled condition number of R1
1283* (If R1 is rectangular, N > NR, then the condition number
1284* of the leading NR x NR submatrix is estimated.)
1285*
1286 CALL slacpy( 'L', nr, nr, v, ldv, work(2*n+1), nr )
1287 DO 3950 p = 1, nr
1288 temp1 = snrm2(nr-p+1,work(2*n+(p-1)*nr+p),1)
1289 CALL sscal(nr-p+1,one/temp1,work(2*n+(p-1)*nr+p),1)
1290 3950 CONTINUE
1291 CALL spocon('Lower',nr,work(2*n+1),nr,one,temp1,
1292 $ work(2*n+nr*nr+1),iwork(m+2*n+1),ierr)
1293 condr1 = one / sqrt(temp1)
1294* .. here need a second opinion on the condition number
1295* .. then assume worst case scenario
1296* R1 is OK for inverse <=> CONDR1 .LT. FLOAT(N)
1297* more conservative <=> CONDR1 .LT. SQRT(FLOAT(N))
1298*
1299 cond_ok = sqrt(float(nr))
1300*[TP] COND_OK is a tuning parameter.
1301
1302 IF ( condr1 .LT. cond_ok ) THEN
1303* .. the second QRF without pivoting. Note: in an optimized
1304* implementation, this QRF should be implemented as the QRF
1305* of a lower triangular matrix.
1306* R1^t = Q2 * R2
1307 CALL sgeqrf( n, nr, v, ldv, work(n+1), work(2*n+1),
1308 $ lwork-2*n, ierr )
1309*
1310 IF ( l2pert ) THEN
1311 xsc = sqrt(small)/epsln
1312 DO 3959 p = 2, nr
1313 DO 3958 q = 1, p - 1
1314 temp1 = xsc * min(abs(v(p,p)),abs(v(q,q)))
1315 IF ( abs(v(q,p)) .LE. temp1 )
1316 $ v(q,p) = sign( temp1, v(q,p) )
1317 3958 CONTINUE
1318 3959 CONTINUE
1319 END IF
1320*
1321 IF ( nr .NE. n )
1322 $ CALL slacpy( 'A', n, nr, v, ldv, work(2*n+1), n )
1323* .. save ...
1324*
1325* .. this transposed copy should be better than naive
1326 DO 1969 p = 1, nr - 1
1327 CALL scopy( nr-p, v(p,p+1), ldv, v(p+1,p), 1 )
1328 1969 CONTINUE
1329*
1330 condr2 = condr1
1331*
1332 ELSE
1333*
1334* .. ill-conditioned case: second QRF with pivoting
1335* Note that windowed pivoting would be equally good
1336* numerically, and more run-time efficient. So, in
1337* an optimal implementation, the next call to SGEQP3
1338* should be replaced with eg. CALL SGEQPX (ACM TOMS #782)
1339* with properly (carefully) chosen parameters.
1340*
1341* R1^t * P2 = Q2 * R2
1342 DO 3003 p = 1, nr
1343 iwork(n+p) = 0
1344 3003 CONTINUE
1345 CALL sgeqp3( n, nr, v, ldv, iwork(n+1), work(n+1),
1346 $ work(2*n+1), lwork-2*n, ierr )
1347** CALL SGEQRF( N, NR, V, LDV, WORK(N+1), WORK(2*N+1),
1348** $ LWORK-2*N, IERR )
1349 IF ( l2pert ) THEN
1350 xsc = sqrt(small)
1351 DO 3969 p = 2, nr
1352 DO 3968 q = 1, p - 1
1353 temp1 = xsc * min(abs(v(p,p)),abs(v(q,q)))
1354 IF ( abs(v(q,p)) .LE. temp1 )
1355 $ v(q,p) = sign( temp1, v(q,p) )
1356 3968 CONTINUE
1357 3969 CONTINUE
1358 END IF
1359*
1360 CALL slacpy( 'A', n, nr, v, ldv, work(2*n+1), n )
1361*
1362 IF ( l2pert ) THEN
1363 xsc = sqrt(small)
1364 DO 8970 p = 2, nr
1365 DO 8971 q = 1, p - 1
1366 temp1 = xsc * min(abs(v(p,p)),abs(v(q,q)))
1367 v(p,q) = - sign( temp1, v(q,p) )
1368 8971 CONTINUE
1369 8970 CONTINUE
1370 ELSE
1371 CALL slaset( 'L',nr-1,nr-1,zero,zero,v(2,1),ldv )
1372 END IF
1373* Now, compute R2 = L3 * Q3, the LQ factorization.
1374 CALL sgelqf( nr, nr, v, ldv, work(2*n+n*nr+1),
1375 $ work(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr, ierr )
1376* .. and estimate the condition number
1377 CALL slacpy( 'L',nr,nr,v,ldv,work(2*n+n*nr+nr+1),nr )
1378 DO 4950 p = 1, nr
1379 temp1 = snrm2( p, work(2*n+n*nr+nr+p), nr )
1380 CALL sscal( p, one/temp1, work(2*n+n*nr+nr+p), nr )
1381 4950 CONTINUE
1382 CALL spocon( 'L',nr,work(2*n+n*nr+nr+1),nr,one,temp1,
1383 $ work(2*n+n*nr+nr+nr*nr+1),iwork(m+2*n+1),ierr )
1384 condr2 = one / sqrt(temp1)
1385*
1386 IF ( condr2 .GE. cond_ok ) THEN
1387* .. save the Householder vectors used for Q3
1388* (this overwrites the copy of R2, as it will not be
1389* needed in this branch, but it does not overwritte the
1390* Huseholder vectors of Q2.).
1391 CALL slacpy( 'U', nr, nr, v, ldv, work(2*n+1), n )
1392* .. and the rest of the information on Q3 is in
1393* WORK(2*N+N*NR+1:2*N+N*NR+N)
1394 END IF
1395*
1396 END IF
1397*
1398 IF ( l2pert ) THEN
1399 xsc = sqrt(small)
1400 DO 4968 q = 2, nr
1401 temp1 = xsc * v(q,q)
1402 DO 4969 p = 1, q - 1
1403* V(p,q) = - SIGN( TEMP1, V(q,p) )
1404 v(p,q) = - sign( temp1, v(p,q) )
1405 4969 CONTINUE
1406 4968 CONTINUE
1407 ELSE
1408 CALL slaset( 'U', nr-1,nr-1, zero,zero, v(1,2), ldv )
1409 END IF
1410*
1411* Second preconditioning finished; continue with Jacobi SVD
1412* The input matrix is lower trinagular.
1413*
1414* Recover the right singular vectors as solution of a well
1415* conditioned triangular matrix equation.
1416*
1417 IF ( condr1 .LT. cond_ok ) THEN
1418*
1419 CALL sgesvj( 'L','U','N',nr,nr,v,ldv,sva,nr,u,
1420 $ ldu,work(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,info )
1421 scalem = work(2*n+n*nr+nr+1)
1422 numrank = nint(work(2*n+n*nr+nr+2))
1423 DO 3970 p = 1, nr
1424 CALL scopy( nr, v(1,p), 1, u(1,p), 1 )
1425 CALL sscal( nr, sva(p), v(1,p), 1 )
1426 3970 CONTINUE
1427
1428* .. pick the right matrix equation and solve it
1429*
1430 IF ( nr .EQ. n ) THEN
1431* :)) .. best case, R1 is inverted. The solution of this matrix
1432* equation is Q2*V2 = the product of the Jacobi rotations
1433* used in SGESVJ, premultiplied with the orthogonal matrix
1434* from the second QR factorization.
1435 CALL strsm( 'L','U','N','N', nr,nr,one, a,lda, v,ldv )
1436 ELSE
1437* .. R1 is well conditioned, but non-square. Transpose(R2)
1438* is inverted to get the product of the Jacobi rotations
1439* used in SGESVJ. The Q-factor from the second QR
1440* factorization is then built in explicitly.
1441 CALL strsm('L','U','T','N',nr,nr,one,work(2*n+1),
1442 $ n,v,ldv)
1443 IF ( nr .LT. n ) THEN
1444 CALL slaset('A',n-nr,nr,zero,zero,v(nr+1,1),ldv)
1445 CALL slaset('A',nr,n-nr,zero,zero,v(1,nr+1),ldv)
1446 CALL slaset('A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv)
1447 END IF
1448 CALL sormqr('L','N',n,n,nr,work(2*n+1),n,work(n+1),
1449 $ v,ldv,work(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr)
1450 END IF
1451*
1452 ELSE IF ( condr2 .LT. cond_ok ) THEN
1453*
1454* :) .. the input matrix A is very likely a relative of
1455* the Kahan matrix :)
1456* The matrix R2 is inverted. The solution of the matrix equation
1457* is Q3^T*V3 = the product of the Jacobi rotations (appplied to
1458* the lower triangular L3 from the LQ factorization of
1459* R2=L3*Q3), pre-multiplied with the transposed Q3.
1460 CALL sgesvj( 'L', 'U', 'N', nr, nr, v, ldv, sva, nr, u,
1461 $ ldu, work(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr, info )
1462 scalem = work(2*n+n*nr+nr+1)
1463 numrank = nint(work(2*n+n*nr+nr+2))
1464 DO 3870 p = 1, nr
1465 CALL scopy( nr, v(1,p), 1, u(1,p), 1 )
1466 CALL sscal( nr, sva(p), u(1,p), 1 )
1467 3870 CONTINUE
1468 CALL strsm('L','U','N','N',nr,nr,one,work(2*n+1),n,u,ldu)
1469* .. apply the permutation from the second QR factorization
1470 DO 873 q = 1, nr
1471 DO 872 p = 1, nr
1472 work(2*n+n*nr+nr+iwork(n+p)) = u(p,q)
1473 872 CONTINUE
1474 DO 874 p = 1, nr
1475 u(p,q) = work(2*n+n*nr+nr+p)
1476 874 CONTINUE
1477 873 CONTINUE
1478 IF ( nr .LT. n ) THEN
1479 CALL slaset( 'A',n-nr,nr,zero,zero,v(nr+1,1),ldv )
1480 CALL slaset( 'A',nr,n-nr,zero,zero,v(1,nr+1),ldv )
1481 CALL slaset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv )
1482 END IF
1483 CALL sormqr( 'L','N',n,n,nr,work(2*n+1),n,work(n+1),
1484 $ v,ldv,work(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
1485 ELSE
1486* Last line of defense.
1487* #:( This is a rather pathological case: no scaled condition
1488* improvement after two pivoted QR factorizations. Other
1489* possibility is that the rank revealing QR factorization
1490* or the condition estimator has failed, or the COND_OK
1491* is set very close to ONE (which is unnecessary). Normally,
1492* this branch should never be executed, but in rare cases of
1493* failure of the RRQR or condition estimator, the last line of
1494* defense ensures that SGEJSV completes the task.
1495* Compute the full SVD of L3 using SGESVJ with explicit
1496* accumulation of Jacobi rotations.
1497 CALL sgesvj( 'L', 'U', 'V', nr, nr, v, ldv, sva, nr, u,
1498 $ ldu, work(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr, info )
1499 scalem = work(2*n+n*nr+nr+1)
1500 numrank = nint(work(2*n+n*nr+nr+2))
1501 IF ( nr .LT. n ) THEN
1502 CALL slaset( 'A',n-nr,nr,zero,zero,v(nr+1,1),ldv )
1503 CALL slaset( 'A',nr,n-nr,zero,zero,v(1,nr+1),ldv )
1504 CALL slaset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv )
1505 END IF
1506 CALL sormqr( 'L','N',n,n,nr,work(2*n+1),n,work(n+1),
1507 $ v,ldv,work(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
1508*
1509 CALL sormlq( 'L', 'T', nr, nr, nr, work(2*n+1), n,
1510 $ work(2*n+n*nr+1), u, ldu, work(2*n+n*nr+nr+1),
1511 $ lwork-2*n-n*nr-nr, ierr )
1512 DO 773 q = 1, nr
1513 DO 772 p = 1, nr
1514 work(2*n+n*nr+nr+iwork(n+p)) = u(p,q)
1515 772 CONTINUE
1516 DO 774 p = 1, nr
1517 u(p,q) = work(2*n+n*nr+nr+p)
1518 774 CONTINUE
1519 773 CONTINUE
1520*
1521 END IF
1522*
1523* Permute the rows of V using the (column) permutation from the
1524* first QRF. Also, scale the columns to make them unit in
1525* Euclidean norm. This applies to all cases.
1526*
1527 temp1 = sqrt(float(n)) * epsln
1528 DO 1972 q = 1, n
1529 DO 972 p = 1, n
1530 work(2*n+n*nr+nr+iwork(p)) = v(p,q)
1531 972 CONTINUE
1532 DO 973 p = 1, n
1533 v(p,q) = work(2*n+n*nr+nr+p)
1534 973 CONTINUE
1535 xsc = one / snrm2( n, v(1,q), 1 )
1536 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1537 $ CALL sscal( n, xsc, v(1,q), 1 )
1538 1972 CONTINUE
1539* At this moment, V contains the right singular vectors of A.
1540* Next, assemble the left singular vector matrix U (M x N).
1541 IF ( nr .LT. m ) THEN
1542 CALL slaset( 'A', m-nr, nr, zero, zero, u(nr+1,1), ldu )
1543 IF ( nr .LT. n1 ) THEN
1544 CALL slaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu)
1545 CALL slaset('A',m-nr,n1-nr,zero,one,u(nr+1,nr+1),ldu)
1546 END IF
1547 END IF
1548*
1549* The Q matrix from the first QRF is built into the left singular
1550* matrix U. This applies to all cases.
1551*
1552 CALL sormqr( 'Left', 'No_Tr', m, n1, n, a, lda, work, u,
1553 $ ldu, work(n+1), lwork-n, ierr )
1554
1555* The columns of U are normalized. The cost is O(M*N) flops.
1556 temp1 = sqrt(float(m)) * epsln
1557 DO 1973 p = 1, nr
1558 xsc = one / snrm2( m, u(1,p), 1 )
1559 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1560 $ CALL sscal( m, xsc, u(1,p), 1 )
1561 1973 CONTINUE
1562*
1563* If the initial QRF is computed with row pivoting, the left
1564* singular vectors must be adjusted.
1565*
1566 IF ( rowpiv )
1567 $ CALL slaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 )
1568*
1569 ELSE
1570*
1571* .. the initial matrix A has almost orthogonal columns and
1572* the second QRF is not needed
1573*
1574 CALL slacpy( 'Upper', n, n, a, lda, work(n+1), n )
1575 IF ( l2pert ) THEN
1576 xsc = sqrt(small)
1577 DO 5970 p = 2, n
1578 temp1 = xsc * work( n + (p-1)*n + p )
1579 DO 5971 q = 1, p - 1
1580 work(n+(q-1)*n+p)=-sign(temp1,work(n+(p-1)*n+q))
1581 5971 CONTINUE
1582 5970 CONTINUE
1583 ELSE
1584 CALL slaset( 'Lower',n-1,n-1,zero,zero,work(n+2),n )
1585 END IF
1586*
1587 CALL sgesvj( 'Upper', 'U', 'N', n, n, work(n+1), n, sva,
1588 $ n, u, ldu, work(n+n*n+1), lwork-n-n*n, info )
1589*
1590 scalem = work(n+n*n+1)
1591 numrank = nint(work(n+n*n+2))
1592 DO 6970 p = 1, n
1593 CALL scopy( n, work(n+(p-1)*n+1), 1, u(1,p), 1 )
1594 CALL sscal( n, sva(p), work(n+(p-1)*n+1), 1 )
1595 6970 CONTINUE
1596*
1597 CALL strsm( 'Left', 'Upper', 'NoTrans', 'No UD', n, n,
1598 $ one, a, lda, work(n+1), n )
1599 DO 6972 p = 1, n
1600 CALL scopy( n, work(n+p), n, v(iwork(p),1), ldv )
1601 6972 CONTINUE
1602 temp1 = sqrt(float(n))*epsln
1603 DO 6971 p = 1, n
1604 xsc = one / snrm2( n, v(1,p), 1 )
1605 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1606 $ CALL sscal( n, xsc, v(1,p), 1 )
1607 6971 CONTINUE
1608*
1609* Assemble the left singular vector matrix U (M x N).
1610*
1611 IF ( n .LT. m ) THEN
1612 CALL slaset( 'A', m-n, n, zero, zero, u(n+1,1), ldu )
1613 IF ( n .LT. n1 ) THEN
1614 CALL slaset( 'A',n, n1-n, zero, zero, u(1,n+1),ldu )
1615 CALL slaset( 'A',m-n,n1-n, zero, one,u(n+1,n+1),ldu )
1616 END IF
1617 END IF
1618 CALL sormqr( 'Left', 'No Tr', m, n1, n, a, lda, work, u,
1619 $ ldu, work(n+1), lwork-n, ierr )
1620 temp1 = sqrt(float(m))*epsln
1621 DO 6973 p = 1, n1
1622 xsc = one / snrm2( m, u(1,p), 1 )
1623 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1624 $ CALL sscal( m, xsc, u(1,p), 1 )
1625 6973 CONTINUE
1626*
1627 IF ( rowpiv )
1628 $ CALL slaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 )
1629*
1630 END IF
1631*
1632* end of the >> almost orthogonal case << in the full SVD
1633*
1634 ELSE
1635*
1636* This branch deploys a preconditioned Jacobi SVD with explicitly
1637* accumulated rotations. It is included as optional, mainly for
1638* experimental purposes. It does perform well, and can also be used.
1639* In this implementation, this branch will be automatically activated
1640* if the condition number sigma_max(A) / sigma_min(A) is predicted
1641* to be greater than the overflow threshold. This is because the
1642* a posteriori computation of the singular vectors assumes robust
1643* implementation of BLAS and some LAPACK procedures, capable of working
1644* in presence of extreme values. Since that is not always the case, ...
1645*
1646 DO 7968 p = 1, nr
1647 CALL scopy( n-p+1, a(p,p), lda, v(p,p), 1 )
1648 7968 CONTINUE
1649*
1650 IF ( l2pert ) THEN
1651 xsc = sqrt(small/epsln)
1652 DO 5969 q = 1, nr
1653 temp1 = xsc*abs( v(q,q) )
1654 DO 5968 p = 1, n
1655 IF ( ( p .GT. q ) .AND. ( abs(v(p,q)) .LE. temp1 )
1656 $ .OR. ( p .LT. q ) )
1657 $ v(p,q) = sign( temp1, v(p,q) )
1658 IF ( p .LT. q ) v(p,q) = - v(p,q)
1659 5968 CONTINUE
1660 5969 CONTINUE
1661 ELSE
1662 CALL slaset( 'U', nr-1, nr-1, zero, zero, v(1,2), ldv )
1663 END IF
1664
1665 CALL sgeqrf( n, nr, v, ldv, work(n+1), work(2*n+1),
1666 $ lwork-2*n, ierr )
1667 CALL slacpy( 'L', n, nr, v, ldv, work(2*n+1), n )
1668*
1669 DO 7969 p = 1, nr
1670 CALL scopy( nr-p+1, v(p,p), ldv, u(p,p), 1 )
1671 7969 CONTINUE
1672
1673 IF ( l2pert ) THEN
1674 xsc = sqrt(small/epsln)
1675 DO 9970 q = 2, nr
1676 DO 9971 p = 1, q - 1
1677 temp1 = xsc * min(abs(u(p,p)),abs(u(q,q)))
1678 u(p,q) = - sign( temp1, u(q,p) )
1679 9971 CONTINUE
1680 9970 CONTINUE
1681 ELSE
1682 CALL slaset('U', nr-1, nr-1, zero, zero, u(1,2), ldu )
1683 END IF
1684
1685 CALL sgesvj( 'L', 'U', 'V', nr, nr, u, ldu, sva,
1686 $ n, v, ldv, work(2*n+n*nr+1), lwork-2*n-n*nr, info )
1687 scalem = work(2*n+n*nr+1)
1688 numrank = nint(work(2*n+n*nr+2))
1689
1690 IF ( nr .LT. n ) THEN
1691 CALL slaset( 'A',n-nr,nr,zero,zero,v(nr+1,1),ldv )
1692 CALL slaset( 'A',nr,n-nr,zero,zero,v(1,nr+1),ldv )
1693 CALL slaset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv )
1694 END IF
1695
1696 CALL sormqr( 'L','N',n,n,nr,work(2*n+1),n,work(n+1),
1697 $ v,ldv,work(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
1698*
1699* Permute the rows of V using the (column) permutation from the
1700* first QRF. Also, scale the columns to make them unit in
1701* Euclidean norm. This applies to all cases.
1702*
1703 temp1 = sqrt(float(n)) * epsln
1704 DO 7972 q = 1, n
1705 DO 8972 p = 1, n
1706 work(2*n+n*nr+nr+iwork(p)) = v(p,q)
1707 8972 CONTINUE
1708 DO 8973 p = 1, n
1709 v(p,q) = work(2*n+n*nr+nr+p)
1710 8973 CONTINUE
1711 xsc = one / snrm2( n, v(1,q), 1 )
1712 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1713 $ CALL sscal( n, xsc, v(1,q), 1 )
1714 7972 CONTINUE
1715*
1716* At this moment, V contains the right singular vectors of A.
1717* Next, assemble the left singular vector matrix U (M x N).
1718*
1719 IF ( nr .LT. m ) THEN
1720 CALL slaset( 'A', m-nr, nr, zero, zero, u(nr+1,1), ldu )
1721 IF ( nr .LT. n1 ) THEN
1722 CALL slaset( 'A',nr, n1-nr, zero, zero, u(1,nr+1),ldu )
1723 CALL slaset( 'A',m-nr,n1-nr, zero, one,u(nr+1,nr+1),ldu )
1724 END IF
1725 END IF
1726*
1727 CALL sormqr( 'Left', 'No Tr', m, n1, n, a, lda, work, u,
1728 $ ldu, work(n+1), lwork-n, ierr )
1729*
1730 IF ( rowpiv )
1731 $ CALL slaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 )
1732*
1733*
1734 END IF
1735 IF ( transp ) THEN
1736* .. swap U and V because the procedure worked on A^t
1737 DO 6974 p = 1, n
1738 CALL sswap( n, u(1,p), 1, v(1,p), 1 )
1739 6974 CONTINUE
1740 END IF
1741*
1742 END IF
1743* end of the full SVD
1744*
1745* Undo scaling, if necessary (and possible)
1746*
1747 IF ( uscal2 .LE. (big/sva(1))*uscal1 ) THEN
1748 CALL slascl( 'G', 0, 0, uscal1, uscal2, nr, 1, sva, n, ierr )
1749 uscal1 = one
1750 uscal2 = one
1751 END IF
1752*
1753 IF ( nr .LT. n ) THEN
1754 DO 3004 p = nr+1, n
1755 sva(p) = zero
1756 3004 CONTINUE
1757 END IF
1758*
1759 work(1) = uscal2 * scalem
1760 work(2) = uscal1
1761 IF ( errest ) work(3) = sconda
1762 IF ( lsvec .AND. rsvec ) THEN
1763 work(4) = condr1
1764 work(5) = condr2
1765 END IF
1766 IF ( l2tran ) THEN
1767 work(6) = entra
1768 work(7) = entrat
1769 END IF
1770*
1771 iwork(1) = nr
1772 iwork(2) = numrank
1773 iwork(3) = warning
1774*
1775 RETURN
1776* ..
1777* .. END OF SGEJSV
1778* ..
subroutine slassq(n, x, incx, scl, sumsq)
SLASSQ updates a sum of squares represented in scaled form.
Definition slassq.f90:137
subroutine slaset(uplo, m, n, alpha, beta, a, lda)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition slaset.f:110
subroutine slacpy(uplo, m, n, a, lda, b, ldb)
SLACPY copies all or part of one two-dimensional array to another.
Definition slacpy.f:103
subroutine slascl(type, kl, ku, cfrom, cto, m, n, a, lda, info)
SLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition slascl.f:143
subroutine xerbla(srname, info)
XERBLA
Definition xerbla.f:60
logical function lsame(ca, cb)
LSAME
Definition lsame.f:53
integer function isamax(n, sx, incx)
ISAMAX
Definition isamax.f:71
subroutine sgeqrf(m, n, a, lda, tau, work, lwork, info)
SGEQRF
Definition sgeqrf.f:146
subroutine sgelqf(m, n, a, lda, tau, work, lwork, info)
SGELQF
Definition sgelqf.f:143
subroutine sgeqp3(m, n, a, lda, jpvt, tau, work, lwork, info)
SGEQP3
Definition sgeqp3.f:151
subroutine sgesvj(joba, jobu, jobv, m, n, a, lda, sva, mv, v, ldv, work, lwork, info)
SGESVJ
Definition sgesvj.f:323
subroutine slaswp(n, a, lda, k1, k2, ipiv, incx)
SLASWP performs a series of row interchanges on a general rectangular matrix.
Definition slaswp.f:115
subroutine sormqr(side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
SORMQR
Definition sormqr.f:168
subroutine sorgqr(m, n, k, a, lda, tau, work, lwork, info)
SORGQR
Definition sorgqr.f:128
subroutine sormlq(side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
SORMLQ
Definition sormlq.f:168
subroutine spocon(uplo, n, a, lda, anorm, rcond, work, iwork, info)
SPOCON
Definition spocon.f:121
subroutine sscal(n, sa, sx, incx)
SSCAL
Definition sscal.f:79
subroutine scopy(n, sx, incx, sy, incy)
SCOPY
Definition scopy.f:82
subroutine sswap(n, sx, incx, sy, incy)
SSWAP
Definition sswap.f:82
real(wp) function snrm2(n, x, incx)
SNRM2
Definition snrm2.f90:89
subroutine strsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
STRSM
Definition strsm.f:181
real function slamch(cmach)
SLAMCH
Definition slamch.f:68
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21

◆ sgesdd()

subroutine sgesdd ( character jobz,
integer m,
integer n,
real, dimension( lda, * ) a,
integer lda,
real, dimension( * ) s,
real, dimension( ldu, * ) u,
integer ldu,
real, dimension( ldvt, * ) vt,
integer ldvt,
real, dimension( * ) work,
integer lwork,
integer, dimension( * ) iwork,
integer info )

SGESDD

Download SGESDD + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> SGESDD computes the singular value decomposition (SVD) of a real
!> M-by-N matrix A, optionally computing the left and right singular
!> vectors.  If singular vectors are desired, it uses a
!> divide-and-conquer algorithm.
!>
!> The SVD is written
!>
!>      A = U * SIGMA * transpose(V)
!>
!> where SIGMA is an M-by-N matrix which is zero except for its
!> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
!> V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
!> are the singular values of A; they are real and non-negative, and
!> are returned in descending order.  The first min(m,n) columns of
!> U and V are the left and right singular vectors of A.
!>
!> Note that the routine returns VT = V**T, not V.
!>
!> The divide and conquer algorithm makes very mild assumptions about
!> floating point arithmetic. It will work on machines with a guard
!> digit in add/subtract, or on those binary machines without guard
!> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
!> Cray-2. It could conceivably fail on hexadecimal or decimal machines
!> without guard digits, but we know of none.
!> 
Parameters
[in]JOBZ
!>          JOBZ is CHARACTER*1
!>          Specifies options for computing all or part of the matrix U:
!>          = 'A':  all M columns of U and all N rows of V**T are
!>                  returned in the arrays U and VT;
!>          = 'S':  the first min(M,N) columns of U and the first
!>                  min(M,N) rows of V**T are returned in the arrays U
!>                  and VT;
!>          = 'O':  If M >= N, the first N columns of U are overwritten
!>                  on the array A and all rows of V**T are returned in
!>                  the array VT;
!>                  otherwise, all columns of U are returned in the
!>                  array U and the first M rows of V**T are overwritten
!>                  in the array A;
!>          = 'N':  no columns of U or rows of V**T are computed.
!> 
[in]M
!>          M is INTEGER
!>          The number of rows of the input matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>          The number of columns of the input matrix A.  N >= 0.
!> 
[in,out]A
!>          A is REAL array, dimension (LDA,N)
!>          On entry, the M-by-N matrix A.
!>          On exit,
!>          if JOBZ = 'O',  A is overwritten with the first N columns
!>                          of U (the left singular vectors, stored
!>                          columnwise) if M >= N;
!>                          A is overwritten with the first M rows
!>                          of V**T (the right singular vectors, stored
!>                          rowwise) otherwise.
!>          if JOBZ .ne. 'O', the contents of A are destroyed.
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A.  LDA >= max(1,M).
!> 
[out]S
!>          S is REAL array, dimension (min(M,N))
!>          The singular values of A, sorted so that S(i) >= S(i+1).
!> 
[out]U
!>          U is REAL array, dimension (LDU,UCOL)
!>          UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
!>          UCOL = min(M,N) if JOBZ = 'S'.
!>          If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
!>          orthogonal matrix U;
!>          if JOBZ = 'S', U contains the first min(M,N) columns of U
!>          (the left singular vectors, stored columnwise);
!>          if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
!> 
[in]LDU
!>          LDU is INTEGER
!>          The leading dimension of the array U.  LDU >= 1; if
!>          JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
!> 
[out]VT
!>          VT is REAL array, dimension (LDVT,N)
!>          If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
!>          N-by-N orthogonal matrix V**T;
!>          if JOBZ = 'S', VT contains the first min(M,N) rows of
!>          V**T (the right singular vectors, stored rowwise);
!>          if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
!> 
[in]LDVT
!>          LDVT is INTEGER
!>          The leading dimension of the array VT.  LDVT >= 1;
!>          if JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
!>          if JOBZ = 'S', LDVT >= min(M,N).
!> 
[out]WORK
!>          WORK is REAL array, dimension (MAX(1,LWORK))
!>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          The dimension of the array WORK. LWORK >= 1.
!>          If LWORK = -1, a workspace query is assumed.  The optimal
!>          size for the WORK array is calculated and stored in WORK(1),
!>          and no other work except argument checking is performed.
!>
!>          Let mx = max(M,N) and mn = min(M,N).
!>          If JOBZ = 'N', LWORK >= 3*mn + max( mx, 7*mn ).
!>          If JOBZ = 'O', LWORK >= 3*mn + max( mx, 5*mn*mn + 4*mn ).
!>          If JOBZ = 'S', LWORK >= 4*mn*mn + 7*mn.
!>          If JOBZ = 'A', LWORK >= 4*mn*mn + 6*mn + mx.
!>          These are not tight minimums in all cases; see comments inside code.
!>          For good performance, LWORK should generally be larger;
!>          a query is recommended.
!> 
[out]IWORK
!>          IWORK is INTEGER array, dimension (8*min(M,N))
!> 
[out]INFO
!>          INFO is INTEGER
!>          <  0:  if INFO = -i, the i-th argument had an illegal value.
!>          = -4:  if A had a NAN entry.
!>          >  0:  SBDSDC did not converge, updating process failed.
!>          =  0:  successful exit.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Contributors:
Ming Gu and Huan Ren, Computer Science Division, University of California at Berkeley, USA

Definition at line 217 of file sgesdd.f.

219 implicit none
220*
221* -- LAPACK driver routine --
222* -- LAPACK is a software package provided by Univ. of Tennessee, --
223* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
224*
225* .. Scalar Arguments ..
226 CHARACTER JOBZ
227 INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
228* ..
229* .. Array Arguments ..
230 INTEGER IWORK( * )
231 REAL A( LDA, * ), S( * ), U( LDU, * ),
232 $ VT( LDVT, * ), WORK( * )
233* ..
234*
235* =====================================================================
236*
237* .. Parameters ..
238 REAL ZERO, ONE
239 parameter( zero = 0.0e0, one = 1.0e0 )
240* ..
241* .. Local Scalars ..
242 LOGICAL LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS
243 INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IL,
244 $ IR, ISCL, ITAU, ITAUP, ITAUQ, IU, IVT, LDWKVT,
245 $ LDWRKL, LDWRKR, LDWRKU, MAXWRK, MINMN, MINWRK,
246 $ MNTHR, NWORK, WRKBL
247 INTEGER LWORK_SGEBRD_MN, LWORK_SGEBRD_MM,
248 $ LWORK_SGEBRD_NN, LWORK_SGELQF_MN,
249 $ LWORK_SGEQRF_MN,
250 $ LWORK_SORGBR_P_MM, LWORK_SORGBR_Q_NN,
251 $ LWORK_SORGLQ_MN, LWORK_SORGLQ_NN,
252 $ LWORK_SORGQR_MM, LWORK_SORGQR_MN,
253 $ LWORK_SORMBR_PRT_MM, LWORK_SORMBR_QLN_MM,
254 $ LWORK_SORMBR_PRT_MN, LWORK_SORMBR_QLN_MN,
255 $ LWORK_SORMBR_PRT_NN, LWORK_SORMBR_QLN_NN
256 REAL ANRM, BIGNUM, EPS, SMLNUM
257* ..
258* .. Local Arrays ..
259 INTEGER IDUM( 1 )
260 REAL DUM( 1 )
261* ..
262* .. External Subroutines ..
263 EXTERNAL sbdsdc, sgebrd, sgelqf, sgemm, sgeqrf, slacpy,
265 $ xerbla
266* ..
267* .. External Functions ..
268 LOGICAL LSAME, SISNAN
269 REAL SLAMCH, SLANGE, SROUNDUP_LWORK
270 EXTERNAL slamch, slange, lsame, sisnan,
272* ..
273* .. Intrinsic Functions ..
274 INTRINSIC int, max, min, sqrt
275* ..
276* .. Executable Statements ..
277*
278* Test the input arguments
279*
280 info = 0
281 minmn = min( m, n )
282 wntqa = lsame( jobz, 'A' )
283 wntqs = lsame( jobz, 'S' )
284 wntqas = wntqa .OR. wntqs
285 wntqo = lsame( jobz, 'O' )
286 wntqn = lsame( jobz, 'N' )
287 lquery = ( lwork.EQ.-1 )
288*
289 IF( .NOT.( wntqa .OR. wntqs .OR. wntqo .OR. wntqn ) ) THEN
290 info = -1
291 ELSE IF( m.LT.0 ) THEN
292 info = -2
293 ELSE IF( n.LT.0 ) THEN
294 info = -3
295 ELSE IF( lda.LT.max( 1, m ) ) THEN
296 info = -5
297 ELSE IF( ldu.LT.1 .OR. ( wntqas .AND. ldu.LT.m ) .OR.
298 $ ( wntqo .AND. m.LT.n .AND. ldu.LT.m ) ) THEN
299 info = -8
300 ELSE IF( ldvt.LT.1 .OR. ( wntqa .AND. ldvt.LT.n ) .OR.
301 $ ( wntqs .AND. ldvt.LT.minmn ) .OR.
302 $ ( wntqo .AND. m.GE.n .AND. ldvt.LT.n ) ) THEN
303 info = -10
304 END IF
305*
306* Compute workspace
307* Note: Comments in the code beginning "Workspace:" describe the
308* minimal amount of workspace allocated at that point in the code,
309* as well as the preferred amount for good performance.
310* NB refers to the optimal block size for the immediately
311* following subroutine, as returned by ILAENV.
312*
313 IF( info.EQ.0 ) THEN
314 minwrk = 1
315 maxwrk = 1
316 bdspac = 0
317 mnthr = int( minmn*11.0e0 / 6.0e0 )
318 IF( m.GE.n .AND. minmn.GT.0 ) THEN
319*
320* Compute space needed for SBDSDC
321*
322 IF( wntqn ) THEN
323* sbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)
324* keep 7*N for backwards compatibility.
325 bdspac = 7*n
326 ELSE
327 bdspac = 3*n*n + 4*n
328 END IF
329*
330* Compute space preferred for each routine
331 CALL sgebrd( m, n, dum(1), m, dum(1), dum(1), dum(1),
332 $ dum(1), dum(1), -1, ierr )
333 lwork_sgebrd_mn = int( dum(1) )
334*
335 CALL sgebrd( n, n, dum(1), n, dum(1), dum(1), dum(1),
336 $ dum(1), dum(1), -1, ierr )
337 lwork_sgebrd_nn = int( dum(1) )
338*
339 CALL sgeqrf( m, n, dum(1), m, dum(1), dum(1), -1, ierr )
340 lwork_sgeqrf_mn = int( dum(1) )
341*
342 CALL sorgbr( 'Q', n, n, n, dum(1), n, dum(1), dum(1), -1,
343 $ ierr )
344 lwork_sorgbr_q_nn = int( dum(1) )
345*
346 CALL sorgqr( m, m, n, dum(1), m, dum(1), dum(1), -1, ierr )
347 lwork_sorgqr_mm = int( dum(1) )
348*
349 CALL sorgqr( m, n, n, dum(1), m, dum(1), dum(1), -1, ierr )
350 lwork_sorgqr_mn = int( dum(1) )
351*
352 CALL sormbr( 'P', 'R', 'T', n, n, n, dum(1), n,
353 $ dum(1), dum(1), n, dum(1), -1, ierr )
354 lwork_sormbr_prt_nn = int( dum(1) )
355*
356 CALL sormbr( 'Q', 'L', 'N', n, n, n, dum(1), n,
357 $ dum(1), dum(1), n, dum(1), -1, ierr )
358 lwork_sormbr_qln_nn = int( dum(1) )
359*
360 CALL sormbr( 'Q', 'L', 'N', m, n, n, dum(1), m,
361 $ dum(1), dum(1), m, dum(1), -1, ierr )
362 lwork_sormbr_qln_mn = int( dum(1) )
363*
364 CALL sormbr( 'Q', 'L', 'N', m, m, n, dum(1), m,
365 $ dum(1), dum(1), m, dum(1), -1, ierr )
366 lwork_sormbr_qln_mm = int( dum(1) )
367*
368 IF( m.GE.mnthr ) THEN
369 IF( wntqn ) THEN
370*
371* Path 1 (M >> N, JOBZ='N')
372*
373 wrkbl = n + lwork_sgeqrf_mn
374 wrkbl = max( wrkbl, 3*n + lwork_sgebrd_nn )
375 maxwrk = max( wrkbl, bdspac + n )
376 minwrk = bdspac + n
377 ELSE IF( wntqo ) THEN
378*
379* Path 2 (M >> N, JOBZ='O')
380*
381 wrkbl = n + lwork_sgeqrf_mn
382 wrkbl = max( wrkbl, n + lwork_sorgqr_mn )
383 wrkbl = max( wrkbl, 3*n + lwork_sgebrd_nn )
384 wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_nn )
385 wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn )
386 wrkbl = max( wrkbl, 3*n + bdspac )
387 maxwrk = wrkbl + 2*n*n
388 minwrk = bdspac + 2*n*n + 3*n
389 ELSE IF( wntqs ) THEN
390*
391* Path 3 (M >> N, JOBZ='S')
392*
393 wrkbl = n + lwork_sgeqrf_mn
394 wrkbl = max( wrkbl, n + lwork_sorgqr_mn )
395 wrkbl = max( wrkbl, 3*n + lwork_sgebrd_nn )
396 wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_nn )
397 wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn )
398 wrkbl = max( wrkbl, 3*n + bdspac )
399 maxwrk = wrkbl + n*n
400 minwrk = bdspac + n*n + 3*n
401 ELSE IF( wntqa ) THEN
402*
403* Path 4 (M >> N, JOBZ='A')
404*
405 wrkbl = n + lwork_sgeqrf_mn
406 wrkbl = max( wrkbl, n + lwork_sorgqr_mm )
407 wrkbl = max( wrkbl, 3*n + lwork_sgebrd_nn )
408 wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_nn )
409 wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn )
410 wrkbl = max( wrkbl, 3*n + bdspac )
411 maxwrk = wrkbl + n*n
412 minwrk = n*n + max( 3*n + bdspac, n + m )
413 END IF
414 ELSE
415*
416* Path 5 (M >= N, but not much larger)
417*
418 wrkbl = 3*n + lwork_sgebrd_mn
419 IF( wntqn ) THEN
420* Path 5n (M >= N, jobz='N')
421 maxwrk = max( wrkbl, 3*n + bdspac )
422 minwrk = 3*n + max( m, bdspac )
423 ELSE IF( wntqo ) THEN
424* Path 5o (M >= N, jobz='O')
425 wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn )
426 wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_mn )
427 wrkbl = max( wrkbl, 3*n + bdspac )
428 maxwrk = wrkbl + m*n
429 minwrk = 3*n + max( m, n*n + bdspac )
430 ELSE IF( wntqs ) THEN
431* Path 5s (M >= N, jobz='S')
432 wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_mn )
433 wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn )
434 maxwrk = max( wrkbl, 3*n + bdspac )
435 minwrk = 3*n + max( m, bdspac )
436 ELSE IF( wntqa ) THEN
437* Path 5a (M >= N, jobz='A')
438 wrkbl = max( wrkbl, 3*n + lwork_sormbr_qln_mm )
439 wrkbl = max( wrkbl, 3*n + lwork_sormbr_prt_nn )
440 maxwrk = max( wrkbl, 3*n + bdspac )
441 minwrk = 3*n + max( m, bdspac )
442 END IF
443 END IF
444 ELSE IF( minmn.GT.0 ) THEN
445*
446* Compute space needed for SBDSDC
447*
448 IF( wntqn ) THEN
449* sbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)
450* keep 7*N for backwards compatibility.
451 bdspac = 7*m
452 ELSE
453 bdspac = 3*m*m + 4*m
454 END IF
455*
456* Compute space preferred for each routine
457 CALL sgebrd( m, n, dum(1), m, dum(1), dum(1), dum(1),
458 $ dum(1), dum(1), -1, ierr )
459 lwork_sgebrd_mn = int( dum(1) )
460*
461 CALL sgebrd( m, m, a, m, s, dum(1), dum(1),
462 $ dum(1), dum(1), -1, ierr )
463 lwork_sgebrd_mm = int( dum(1) )
464*
465 CALL sgelqf( m, n, a, m, dum(1), dum(1), -1, ierr )
466 lwork_sgelqf_mn = int( dum(1) )
467*
468 CALL sorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr )
469 lwork_sorglq_nn = int( dum(1) )
470*
471 CALL sorglq( m, n, m, a, m, dum(1), dum(1), -1, ierr )
472 lwork_sorglq_mn = int( dum(1) )
473*
474 CALL sorgbr( 'P', m, m, m, a, n, dum(1), dum(1), -1, ierr )
475 lwork_sorgbr_p_mm = int( dum(1) )
476*
477 CALL sormbr( 'P', 'R', 'T', m, m, m, dum(1), m,
478 $ dum(1), dum(1), m, dum(1), -1, ierr )
479 lwork_sormbr_prt_mm = int( dum(1) )
480*
481 CALL sormbr( 'P', 'R', 'T', m, n, m, dum(1), m,
482 $ dum(1), dum(1), m, dum(1), -1, ierr )
483 lwork_sormbr_prt_mn = int( dum(1) )
484*
485 CALL sormbr( 'P', 'R', 'T', n, n, m, dum(1), n,
486 $ dum(1), dum(1), n, dum(1), -1, ierr )
487 lwork_sormbr_prt_nn = int( dum(1) )
488*
489 CALL sormbr( 'Q', 'L', 'N', m, m, m, dum(1), m,
490 $ dum(1), dum(1), m, dum(1), -1, ierr )
491 lwork_sormbr_qln_mm = int( dum(1) )
492*
493 IF( n.GE.mnthr ) THEN
494 IF( wntqn ) THEN
495*
496* Path 1t (N >> M, JOBZ='N')
497*
498 wrkbl = m + lwork_sgelqf_mn
499 wrkbl = max( wrkbl, 3*m + lwork_sgebrd_mm )
500 maxwrk = max( wrkbl, bdspac + m )
501 minwrk = bdspac + m
502 ELSE IF( wntqo ) THEN
503*
504* Path 2t (N >> M, JOBZ='O')
505*
506 wrkbl = m + lwork_sgelqf_mn
507 wrkbl = max( wrkbl, m + lwork_sorglq_mn )
508 wrkbl = max( wrkbl, 3*m + lwork_sgebrd_mm )
509 wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm )
510 wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_mm )
511 wrkbl = max( wrkbl, 3*m + bdspac )
512 maxwrk = wrkbl + 2*m*m
513 minwrk = bdspac + 2*m*m + 3*m
514 ELSE IF( wntqs ) THEN
515*
516* Path 3t (N >> M, JOBZ='S')
517*
518 wrkbl = m + lwork_sgelqf_mn
519 wrkbl = max( wrkbl, m + lwork_sorglq_mn )
520 wrkbl = max( wrkbl, 3*m + lwork_sgebrd_mm )
521 wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm )
522 wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_mm )
523 wrkbl = max( wrkbl, 3*m + bdspac )
524 maxwrk = wrkbl + m*m
525 minwrk = bdspac + m*m + 3*m
526 ELSE IF( wntqa ) THEN
527*
528* Path 4t (N >> M, JOBZ='A')
529*
530 wrkbl = m + lwork_sgelqf_mn
531 wrkbl = max( wrkbl, m + lwork_sorglq_nn )
532 wrkbl = max( wrkbl, 3*m + lwork_sgebrd_mm )
533 wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm )
534 wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_mm )
535 wrkbl = max( wrkbl, 3*m + bdspac )
536 maxwrk = wrkbl + m*m
537 minwrk = m*m + max( 3*m + bdspac, m + n )
538 END IF
539 ELSE
540*
541* Path 5t (N > M, but not much larger)
542*
543 wrkbl = 3*m + lwork_sgebrd_mn
544 IF( wntqn ) THEN
545* Path 5tn (N > M, jobz='N')
546 maxwrk = max( wrkbl, 3*m + bdspac )
547 minwrk = 3*m + max( n, bdspac )
548 ELSE IF( wntqo ) THEN
549* Path 5to (N > M, jobz='O')
550 wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm )
551 wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_mn )
552 wrkbl = max( wrkbl, 3*m + bdspac )
553 maxwrk = wrkbl + m*n
554 minwrk = 3*m + max( n, m*m + bdspac )
555 ELSE IF( wntqs ) THEN
556* Path 5ts (N > M, jobz='S')
557 wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm )
558 wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_mn )
559 maxwrk = max( wrkbl, 3*m + bdspac )
560 minwrk = 3*m + max( n, bdspac )
561 ELSE IF( wntqa ) THEN
562* Path 5ta (N > M, jobz='A')
563 wrkbl = max( wrkbl, 3*m + lwork_sormbr_qln_mm )
564 wrkbl = max( wrkbl, 3*m + lwork_sormbr_prt_nn )
565 maxwrk = max( wrkbl, 3*m + bdspac )
566 minwrk = 3*m + max( n, bdspac )
567 END IF
568 END IF
569 END IF
570
571 maxwrk = max( maxwrk, minwrk )
572 work( 1 ) = sroundup_lwork( maxwrk )
573*
574 IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
575 info = -12
576 END IF
577 END IF
578*
579 IF( info.NE.0 ) THEN
580 CALL xerbla( 'SGESDD', -info )
581 RETURN
582 ELSE IF( lquery ) THEN
583 RETURN
584 END IF
585*
586* Quick return if possible
587*
588 IF( m.EQ.0 .OR. n.EQ.0 ) THEN
589 RETURN
590 END IF
591*
592* Get machine constants
593*
594 eps = slamch( 'P' )
595 smlnum = sqrt( slamch( 'S' ) ) / eps
596 bignum = one / smlnum
597*
598* Scale A if max element outside range [SMLNUM,BIGNUM]
599*
600 anrm = slange( 'M', m, n, a, lda, dum )
601 IF( sisnan( anrm ) ) THEN
602 info = -4
603 RETURN
604 END IF
605 iscl = 0
606 IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
607 iscl = 1
608 CALL slascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
609 ELSE IF( anrm.GT.bignum ) THEN
610 iscl = 1
611 CALL slascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
612 END IF
613*
614 IF( m.GE.n ) THEN
615*
616* A has at least as many rows as columns. If A has sufficiently
617* more rows than columns, first reduce using the QR
618* decomposition (if sufficient workspace available)
619*
620 IF( m.GE.mnthr ) THEN
621*
622 IF( wntqn ) THEN
623*
624* Path 1 (M >> N, JOBZ='N')
625* No singular vectors to be computed
626*
627 itau = 1
628 nwork = itau + n
629*
630* Compute A=Q*R
631* Workspace: need N [tau] + N [work]
632* Workspace: prefer N [tau] + N*NB [work]
633*
634 CALL sgeqrf( m, n, a, lda, work( itau ), work( nwork ),
635 $ lwork - nwork + 1, ierr )
636*
637* Zero out below R
638*
639 CALL slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda )
640 ie = 1
641 itauq = ie + n
642 itaup = itauq + n
643 nwork = itaup + n
644*
645* Bidiagonalize R in A
646* Workspace: need 3*N [e, tauq, taup] + N [work]
647* Workspace: prefer 3*N [e, tauq, taup] + 2*N*NB [work]
648*
649 CALL sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
650 $ work( itaup ), work( nwork ), lwork-nwork+1,
651 $ ierr )
652 nwork = ie + n
653*
654* Perform bidiagonal SVD, computing singular values only
655* Workspace: need N [e] + BDSPAC
656*
657 CALL sbdsdc( 'U', 'N', n, s, work( ie ), dum, 1, dum, 1,
658 $ dum, idum, work( nwork ), iwork, info )
659*
660 ELSE IF( wntqo ) THEN
661*
662* Path 2 (M >> N, JOBZ = 'O')
663* N left singular vectors to be overwritten on A and
664* N right singular vectors to be computed in VT
665*
666 ir = 1
667*
668* WORK(IR) is LDWRKR by N
669*
670 IF( lwork .GE. lda*n + n*n + 3*n + bdspac ) THEN
671 ldwrkr = lda
672 ELSE
673 ldwrkr = ( lwork - n*n - 3*n - bdspac ) / n
674 END IF
675 itau = ir + ldwrkr*n
676 nwork = itau + n
677*
678* Compute A=Q*R
679* Workspace: need N*N [R] + N [tau] + N [work]
680* Workspace: prefer N*N [R] + N [tau] + N*NB [work]
681*
682 CALL sgeqrf( m, n, a, lda, work( itau ), work( nwork ),
683 $ lwork - nwork + 1, ierr )
684*
685* Copy R to WORK(IR), zeroing out below it
686*
687 CALL slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
688 CALL slaset( 'L', n - 1, n - 1, zero, zero, work(ir+1),
689 $ ldwrkr )
690*
691* Generate Q in A
692* Workspace: need N*N [R] + N [tau] + N [work]
693* Workspace: prefer N*N [R] + N [tau] + N*NB [work]
694*
695 CALL sorgqr( m, n, n, a, lda, work( itau ),
696 $ work( nwork ), lwork - nwork + 1, ierr )
697 ie = itau
698 itauq = ie + n
699 itaup = itauq + n
700 nwork = itaup + n
701*
702* Bidiagonalize R in WORK(IR)
703* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work]
704* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work]
705*
706 CALL sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
707 $ work( itauq ), work( itaup ), work( nwork ),
708 $ lwork - nwork + 1, ierr )
709*
710* WORK(IU) is N by N
711*
712 iu = nwork
713 nwork = iu + n*n
714*
715* Perform bidiagonal SVD, computing left singular vectors
716* of bidiagonal matrix in WORK(IU) and computing right
717* singular vectors of bidiagonal matrix in VT
718* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] + BDSPAC
719*
720 CALL sbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,
721 $ vt, ldvt, dum, idum, work( nwork ), iwork,
722 $ info )
723*
724* Overwrite WORK(IU) by left singular vectors of R
725* and VT by right singular vectors of R
726* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N [work]
727* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
728*
729 CALL sormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,
730 $ work( itauq ), work( iu ), n, work( nwork ),
731 $ lwork - nwork + 1, ierr )
732 CALL sormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,
733 $ work( itaup ), vt, ldvt, work( nwork ),
734 $ lwork - nwork + 1, ierr )
735*
736* Multiply Q in A by left singular vectors of R in
737* WORK(IU), storing result in WORK(IR) and copying to A
738* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U]
739* Workspace: prefer M*N [R] + 3*N [e, tauq, taup] + N*N [U]
740*
741 DO 10 i = 1, m, ldwrkr
742 chunk = min( m - i + 1, ldwrkr )
743 CALL sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
744 $ lda, work( iu ), n, zero, work( ir ),
745 $ ldwrkr )
746 CALL slacpy( 'F', chunk, n, work( ir ), ldwrkr,
747 $ a( i, 1 ), lda )
748 10 CONTINUE
749*
750 ELSE IF( wntqs ) THEN
751*
752* Path 3 (M >> N, JOBZ='S')
753* N left singular vectors to be computed in U and
754* N right singular vectors to be computed in VT
755*
756 ir = 1
757*
758* WORK(IR) is N by N
759*
760 ldwrkr = n
761 itau = ir + ldwrkr*n
762 nwork = itau + n
763*
764* Compute A=Q*R
765* Workspace: need N*N [R] + N [tau] + N [work]
766* Workspace: prefer N*N [R] + N [tau] + N*NB [work]
767*
768 CALL sgeqrf( m, n, a, lda, work( itau ), work( nwork ),
769 $ lwork - nwork + 1, ierr )
770*
771* Copy R to WORK(IR), zeroing out below it
772*
773 CALL slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
774 CALL slaset( 'L', n - 1, n - 1, zero, zero, work(ir+1),
775 $ ldwrkr )
776*
777* Generate Q in A
778* Workspace: need N*N [R] + N [tau] + N [work]
779* Workspace: prefer N*N [R] + N [tau] + N*NB [work]
780*
781 CALL sorgqr( m, n, n, a, lda, work( itau ),
782 $ work( nwork ), lwork - nwork + 1, ierr )
783 ie = itau
784 itauq = ie + n
785 itaup = itauq + n
786 nwork = itaup + n
787*
788* Bidiagonalize R in WORK(IR)
789* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work]
790* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work]
791*
792 CALL sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
793 $ work( itauq ), work( itaup ), work( nwork ),
794 $ lwork - nwork + 1, ierr )
795*
796* Perform bidiagonal SVD, computing left singular vectors
797* of bidiagoal matrix in U and computing right singular
798* vectors of bidiagonal matrix in VT
799* Workspace: need N*N [R] + 3*N [e, tauq, taup] + BDSPAC
800*
801 CALL sbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,
802 $ ldvt, dum, idum, work( nwork ), iwork,
803 $ info )
804*
805* Overwrite U by left singular vectors of R and VT
806* by right singular vectors of R
807* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work]
808* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*NB [work]
809*
810 CALL sormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,
811 $ work( itauq ), u, ldu, work( nwork ),
812 $ lwork - nwork + 1, ierr )
813*
814 CALL sormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,
815 $ work( itaup ), vt, ldvt, work( nwork ),
816 $ lwork - nwork + 1, ierr )
817*
818* Multiply Q in A by left singular vectors of R in
819* WORK(IR), storing result in U
820* Workspace: need N*N [R]
821*
822 CALL slacpy( 'F', n, n, u, ldu, work( ir ), ldwrkr )
823 CALL sgemm( 'N', 'N', m, n, n, one, a, lda, work( ir ),
824 $ ldwrkr, zero, u, ldu )
825*
826 ELSE IF( wntqa ) THEN
827*
828* Path 4 (M >> N, JOBZ='A')
829* M left singular vectors to be computed in U and
830* N right singular vectors to be computed in VT
831*
832 iu = 1
833*
834* WORK(IU) is N by N
835*
836 ldwrku = n
837 itau = iu + ldwrku*n
838 nwork = itau + n
839*
840* Compute A=Q*R, copying result to U
841* Workspace: need N*N [U] + N [tau] + N [work]
842* Workspace: prefer N*N [U] + N [tau] + N*NB [work]
843*
844 CALL sgeqrf( m, n, a, lda, work( itau ), work( nwork ),
845 $ lwork - nwork + 1, ierr )
846 CALL slacpy( 'L', m, n, a, lda, u, ldu )
847*
848* Generate Q in U
849* Workspace: need N*N [U] + N [tau] + M [work]
850* Workspace: prefer N*N [U] + N [tau] + M*NB [work]
851 CALL sorgqr( m, m, n, u, ldu, work( itau ),
852 $ work( nwork ), lwork - nwork + 1, ierr )
853*
854* Produce R in A, zeroing out other entries
855*
856 CALL slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda )
857 ie = itau
858 itauq = ie + n
859 itaup = itauq + n
860 nwork = itaup + n
861*
862* Bidiagonalize R in A
863* Workspace: need N*N [U] + 3*N [e, tauq, taup] + N [work]
864* Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + 2*N*NB [work]
865*
866 CALL sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
867 $ work( itaup ), work( nwork ), lwork-nwork+1,
868 $ ierr )
869*
870* Perform bidiagonal SVD, computing left singular vectors
871* of bidiagonal matrix in WORK(IU) and computing right
872* singular vectors of bidiagonal matrix in VT
873* Workspace: need N*N [U] + 3*N [e, tauq, taup] + BDSPAC
874*
875 CALL sbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,
876 $ vt, ldvt, dum, idum, work( nwork ), iwork,
877 $ info )
878*
879* Overwrite WORK(IU) by left singular vectors of R and VT
880* by right singular vectors of R
881* Workspace: need N*N [U] + 3*N [e, tauq, taup] + N [work]
882* Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + N*NB [work]
883*
884 CALL sormbr( 'Q', 'L', 'N', n, n, n, a, lda,
885 $ work( itauq ), work( iu ), ldwrku,
886 $ work( nwork ), lwork - nwork + 1, ierr )
887 CALL sormbr( 'P', 'R', 'T', n, n, n, a, lda,
888 $ work( itaup ), vt, ldvt, work( nwork ),
889 $ lwork - nwork + 1, ierr )
890*
891* Multiply Q in U by left singular vectors of R in
892* WORK(IU), storing result in A
893* Workspace: need N*N [U]
894*
895 CALL sgemm( 'N', 'N', m, n, n, one, u, ldu, work( iu ),
896 $ ldwrku, zero, a, lda )
897*
898* Copy left singular vectors of A from A to U
899*
900 CALL slacpy( 'F', m, n, a, lda, u, ldu )
901*
902 END IF
903*
904 ELSE
905*
906* M .LT. MNTHR
907*
908* Path 5 (M >= N, but not much larger)
909* Reduce to bidiagonal form without QR decomposition
910*
911 ie = 1
912 itauq = ie + n
913 itaup = itauq + n
914 nwork = itaup + n
915*
916* Bidiagonalize A
917* Workspace: need 3*N [e, tauq, taup] + M [work]
918* Workspace: prefer 3*N [e, tauq, taup] + (M+N)*NB [work]
919*
920 CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
921 $ work( itaup ), work( nwork ), lwork-nwork+1,
922 $ ierr )
923 IF( wntqn ) THEN
924*
925* Path 5n (M >= N, JOBZ='N')
926* Perform bidiagonal SVD, only computing singular values
927* Workspace: need 3*N [e, tauq, taup] + BDSPAC
928*
929 CALL sbdsdc( 'U', 'N', n, s, work( ie ), dum, 1, dum, 1,
930 $ dum, idum, work( nwork ), iwork, info )
931 ELSE IF( wntqo ) THEN
932* Path 5o (M >= N, JOBZ='O')
933 iu = nwork
934 IF( lwork .GE. m*n + 3*n + bdspac ) THEN
935*
936* WORK( IU ) is M by N
937*
938 ldwrku = m
939 nwork = iu + ldwrku*n
940 CALL slaset( 'F', m, n, zero, zero, work( iu ),
941 $ ldwrku )
942* IR is unused; silence compile warnings
943 ir = -1
944 ELSE
945*
946* WORK( IU ) is N by N
947*
948 ldwrku = n
949 nwork = iu + ldwrku*n
950*
951* WORK(IR) is LDWRKR by N
952*
953 ir = nwork
954 ldwrkr = ( lwork - n*n - 3*n ) / n
955 END IF
956 nwork = iu + ldwrku*n
957*
958* Perform bidiagonal SVD, computing left singular vectors
959* of bidiagonal matrix in WORK(IU) and computing right
960* singular vectors of bidiagonal matrix in VT
961* Workspace: need 3*N [e, tauq, taup] + N*N [U] + BDSPAC
962*
963 CALL sbdsdc( 'U', 'I', n, s, work( ie ), work( iu ),
964 $ ldwrku, vt, ldvt, dum, idum, work( nwork ),
965 $ iwork, info )
966*
967* Overwrite VT by right singular vectors of A
968* Workspace: need 3*N [e, tauq, taup] + N*N [U] + N [work]
969* Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
970*
971 CALL sormbr( 'P', 'R', 'T', n, n, n, a, lda,
972 $ work( itaup ), vt, ldvt, work( nwork ),
973 $ lwork - nwork + 1, ierr )
974*
975 IF( lwork .GE. m*n + 3*n + bdspac ) THEN
976*
977* Path 5o-fast
978* Overwrite WORK(IU) by left singular vectors of A
979* Workspace: need 3*N [e, tauq, taup] + M*N [U] + N [work]
980* Workspace: prefer 3*N [e, tauq, taup] + M*N [U] + N*NB [work]
981*
982 CALL sormbr( 'Q', 'L', 'N', m, n, n, a, lda,
983 $ work( itauq ), work( iu ), ldwrku,
984 $ work( nwork ), lwork - nwork + 1, ierr )
985*
986* Copy left singular vectors of A from WORK(IU) to A
987*
988 CALL slacpy( 'F', m, n, work( iu ), ldwrku, a, lda )
989 ELSE
990*
991* Path 5o-slow
992* Generate Q in A
993* Workspace: need 3*N [e, tauq, taup] + N*N [U] + N [work]
994* Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
995*
996 CALL sorgbr( 'Q', m, n, n, a, lda, work( itauq ),
997 $ work( nwork ), lwork - nwork + 1, ierr )
998*
999* Multiply Q in A by left singular vectors of
1000* bidiagonal matrix in WORK(IU), storing result in
1001* WORK(IR) and copying to A
1002* Workspace: need 3*N [e, tauq, taup] + N*N [U] + NB*N [R]
1003* Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + M*N [R]
1004*
1005 DO 20 i = 1, m, ldwrkr
1006 chunk = min( m - i + 1, ldwrkr )
1007 CALL sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
1008 $ lda, work( iu ), ldwrku, zero,
1009 $ work( ir ), ldwrkr )
1010 CALL slacpy( 'F', chunk, n, work( ir ), ldwrkr,
1011 $ a( i, 1 ), lda )
1012 20 CONTINUE
1013 END IF
1014*
1015 ELSE IF( wntqs ) THEN
1016*
1017* Path 5s (M >= N, JOBZ='S')
1018* Perform bidiagonal SVD, computing left singular vectors
1019* of bidiagonal matrix in U and computing right singular
1020* vectors of bidiagonal matrix in VT
1021* Workspace: need 3*N [e, tauq, taup] + BDSPAC
1022*
1023 CALL slaset( 'F', m, n, zero, zero, u, ldu )
1024 CALL sbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,
1025 $ ldvt, dum, idum, work( nwork ), iwork,
1026 $ info )
1027*
1028* Overwrite U by left singular vectors of A and VT
1029* by right singular vectors of A
1030* Workspace: need 3*N [e, tauq, taup] + N [work]
1031* Workspace: prefer 3*N [e, tauq, taup] + N*NB [work]
1032*
1033 CALL sormbr( 'Q', 'L', 'N', m, n, n, a, lda,
1034 $ work( itauq ), u, ldu, work( nwork ),
1035 $ lwork - nwork + 1, ierr )
1036 CALL sormbr( 'P', 'R', 'T', n, n, n, a, lda,
1037 $ work( itaup ), vt, ldvt, work( nwork ),
1038 $ lwork - nwork + 1, ierr )
1039 ELSE IF( wntqa ) THEN
1040*
1041* Path 5a (M >= N, JOBZ='A')
1042* Perform bidiagonal SVD, computing left singular vectors
1043* of bidiagonal matrix in U and computing right singular
1044* vectors of bidiagonal matrix in VT
1045* Workspace: need 3*N [e, tauq, taup] + BDSPAC
1046*
1047 CALL slaset( 'F', m, m, zero, zero, u, ldu )
1048 CALL sbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,
1049 $ ldvt, dum, idum, work( nwork ), iwork,
1050 $ info )
1051*
1052* Set the right corner of U to identity matrix
1053*
1054 IF( m.GT.n ) THEN
1055 CALL slaset( 'F', m - n, m - n, zero, one, u(n+1,n+1),
1056 $ ldu )
1057 END IF
1058*
1059* Overwrite U by left singular vectors of A and VT
1060* by right singular vectors of A
1061* Workspace: need 3*N [e, tauq, taup] + M [work]
1062* Workspace: prefer 3*N [e, tauq, taup] + M*NB [work]
1063*
1064 CALL sormbr( 'Q', 'L', 'N', m, m, n, a, lda,
1065 $ work( itauq ), u, ldu, work( nwork ),
1066 $ lwork - nwork + 1, ierr )
1067 CALL sormbr( 'P', 'R', 'T', n, n, m, a, lda,
1068 $ work( itaup ), vt, ldvt, work( nwork ),
1069 $ lwork - nwork + 1, ierr )
1070 END IF
1071*
1072 END IF
1073*
1074 ELSE
1075*
1076* A has more columns than rows. If A has sufficiently more
1077* columns than rows, first reduce using the LQ decomposition (if
1078* sufficient workspace available)
1079*
1080 IF( n.GE.mnthr ) THEN
1081*
1082 IF( wntqn ) THEN
1083*
1084* Path 1t (N >> M, JOBZ='N')
1085* No singular vectors to be computed
1086*
1087 itau = 1
1088 nwork = itau + m
1089*
1090* Compute A=L*Q
1091* Workspace: need M [tau] + M [work]
1092* Workspace: prefer M [tau] + M*NB [work]
1093*
1094 CALL sgelqf( m, n, a, lda, work( itau ), work( nwork ),
1095 $ lwork - nwork + 1, ierr )
1096*
1097* Zero out above L
1098*
1099 CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
1100 ie = 1
1101 itauq = ie + m
1102 itaup = itauq + m
1103 nwork = itaup + m
1104*
1105* Bidiagonalize L in A
1106* Workspace: need 3*M [e, tauq, taup] + M [work]
1107* Workspace: prefer 3*M [e, tauq, taup] + 2*M*NB [work]
1108*
1109 CALL sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
1110 $ work( itaup ), work( nwork ), lwork-nwork+1,
1111 $ ierr )
1112 nwork = ie + m
1113*
1114* Perform bidiagonal SVD, computing singular values only
1115* Workspace: need M [e] + BDSPAC
1116*
1117 CALL sbdsdc( 'U', 'N', m, s, work( ie ), dum, 1, dum, 1,
1118 $ dum, idum, work( nwork ), iwork, info )
1119*
1120 ELSE IF( wntqo ) THEN
1121*
1122* Path 2t (N >> M, JOBZ='O')
1123* M right singular vectors to be overwritten on A and
1124* M left singular vectors to be computed in U
1125*
1126 ivt = 1
1127*
1128* WORK(IVT) is M by M
1129* WORK(IL) is M by M; it is later resized to M by chunk for gemm
1130*
1131 il = ivt + m*m
1132 IF( lwork .GE. m*n + m*m + 3*m + bdspac ) THEN
1133 ldwrkl = m
1134 chunk = n
1135 ELSE
1136 ldwrkl = m
1137 chunk = ( lwork - m*m ) / m
1138 END IF
1139 itau = il + ldwrkl*m
1140 nwork = itau + m
1141*
1142* Compute A=L*Q
1143* Workspace: need M*M [VT] + M*M [L] + M [tau] + M [work]
1144* Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
1145*
1146 CALL sgelqf( m, n, a, lda, work( itau ), work( nwork ),
1147 $ lwork - nwork + 1, ierr )
1148*
1149* Copy L to WORK(IL), zeroing about above it
1150*
1151 CALL slacpy( 'L', m, m, a, lda, work( il ), ldwrkl )
1152 CALL slaset( 'U', m - 1, m - 1, zero, zero,
1153 $ work( il + ldwrkl ), ldwrkl )
1154*
1155* Generate Q in A
1156* Workspace: need M*M [VT] + M*M [L] + M [tau] + M [work]
1157* Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
1158*
1159 CALL sorglq( m, n, m, a, lda, work( itau ),
1160 $ work( nwork ), lwork - nwork + 1, ierr )
1161 ie = itau
1162 itauq = ie + m
1163 itaup = itauq + m
1164 nwork = itaup + m
1165*
1166* Bidiagonalize L in WORK(IL)
1167* Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M [work]
1168* Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work]
1169*
1170 CALL sgebrd( m, m, work( il ), ldwrkl, s, work( ie ),
1171 $ work( itauq ), work( itaup ), work( nwork ),
1172 $ lwork - nwork + 1, ierr )
1173*
1174* Perform bidiagonal SVD, computing left singular vectors
1175* of bidiagonal matrix in U, and computing right singular
1176* vectors of bidiagonal matrix in WORK(IVT)
1177* Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + BDSPAC
1178*
1179 CALL sbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,
1180 $ work( ivt ), m, dum, idum, work( nwork ),
1181 $ iwork, info )
1182*
1183* Overwrite U by left singular vectors of L and WORK(IVT)
1184* by right singular vectors of L
1185* Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M [work]
1186* Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M*NB [work]
1187*
1188 CALL sormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,
1189 $ work( itauq ), u, ldu, work( nwork ),
1190 $ lwork - nwork + 1, ierr )
1191 CALL sormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,
1192 $ work( itaup ), work( ivt ), m,
1193 $ work( nwork ), lwork - nwork + 1, ierr )
1194*
1195* Multiply right singular vectors of L in WORK(IVT) by Q
1196* in A, storing result in WORK(IL) and copying to A
1197* Workspace: need M*M [VT] + M*M [L]
1198* Workspace: prefer M*M [VT] + M*N [L]
1199* At this point, L is resized as M by chunk.
1200*
1201 DO 30 i = 1, n, chunk
1202 blk = min( n - i + 1, chunk )
1203 CALL sgemm( 'N', 'N', m, blk, m, one, work( ivt ), m,
1204 $ a( 1, i ), lda, zero, work( il ), ldwrkl )
1205 CALL slacpy( 'F', m, blk, work( il ), ldwrkl,
1206 $ a( 1, i ), lda )
1207 30 CONTINUE
1208*
1209 ELSE IF( wntqs ) THEN
1210*
1211* Path 3t (N >> M, JOBZ='S')
1212* M right singular vectors to be computed in VT and
1213* M left singular vectors to be computed in U
1214*
1215 il = 1
1216*
1217* WORK(IL) is M by M
1218*
1219 ldwrkl = m
1220 itau = il + ldwrkl*m
1221 nwork = itau + m
1222*
1223* Compute A=L*Q
1224* Workspace: need M*M [L] + M [tau] + M [work]
1225* Workspace: prefer M*M [L] + M [tau] + M*NB [work]
1226*
1227 CALL sgelqf( m, n, a, lda, work( itau ), work( nwork ),
1228 $ lwork - nwork + 1, ierr )
1229*
1230* Copy L to WORK(IL), zeroing out above it
1231*
1232 CALL slacpy( 'L', m, m, a, lda, work( il ), ldwrkl )
1233 CALL slaset( 'U', m - 1, m - 1, zero, zero,
1234 $ work( il + ldwrkl ), ldwrkl )
1235*
1236* Generate Q in A
1237* Workspace: need M*M [L] + M [tau] + M [work]
1238* Workspace: prefer M*M [L] + M [tau] + M*NB [work]
1239*
1240 CALL sorglq( m, n, m, a, lda, work( itau ),
1241 $ work( nwork ), lwork - nwork + 1, ierr )
1242 ie = itau
1243 itauq = ie + m
1244 itaup = itauq + m
1245 nwork = itaup + m
1246*
1247* Bidiagonalize L in WORK(IU).
1248* Workspace: need M*M [L] + 3*M [e, tauq, taup] + M [work]
1249* Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work]
1250*
1251 CALL sgebrd( m, m, work( il ), ldwrkl, s, work( ie ),
1252 $ work( itauq ), work( itaup ), work( nwork ),
1253 $ lwork - nwork + 1, ierr )
1254*
1255* Perform bidiagonal SVD, computing left singular vectors
1256* of bidiagonal matrix in U and computing right singular
1257* vectors of bidiagonal matrix in VT
1258* Workspace: need M*M [L] + 3*M [e, tauq, taup] + BDSPAC
1259*
1260 CALL sbdsdc( 'U', 'I', m, s, work( ie ), u, ldu, vt,
1261 $ ldvt, dum, idum, work( nwork ), iwork,
1262 $ info )
1263*
1264* Overwrite U by left singular vectors of L and VT
1265* by right singular vectors of L
1266* Workspace: need M*M [L] + 3*M [e, tauq, taup] + M [work]
1267* Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + M*NB [work]
1268*
1269 CALL sormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,
1270 $ work( itauq ), u, ldu, work( nwork ),
1271 $ lwork - nwork + 1, ierr )
1272 CALL sormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,
1273 $ work( itaup ), vt, ldvt, work( nwork ),
1274 $ lwork - nwork + 1, ierr )
1275*
1276* Multiply right singular vectors of L in WORK(IL) by
1277* Q in A, storing result in VT
1278* Workspace: need M*M [L]
1279*
1280 CALL slacpy( 'F', m, m, vt, ldvt, work( il ), ldwrkl )
1281 CALL sgemm( 'N', 'N', m, n, m, one, work( il ), ldwrkl,
1282 $ a, lda, zero, vt, ldvt )
1283*
1284 ELSE IF( wntqa ) THEN
1285*
1286* Path 4t (N >> M, JOBZ='A')
1287* N right singular vectors to be computed in VT and
1288* M left singular vectors to be computed in U
1289*
1290 ivt = 1
1291*
1292* WORK(IVT) is M by M
1293*
1294 ldwkvt = m
1295 itau = ivt + ldwkvt*m
1296 nwork = itau + m
1297*
1298* Compute A=L*Q, copying result to VT
1299* Workspace: need M*M [VT] + M [tau] + M [work]
1300* Workspace: prefer M*M [VT] + M [tau] + M*NB [work]
1301*
1302 CALL sgelqf( m, n, a, lda, work( itau ), work( nwork ),
1303 $ lwork - nwork + 1, ierr )
1304 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
1305*
1306* Generate Q in VT
1307* Workspace: need M*M [VT] + M [tau] + N [work]
1308* Workspace: prefer M*M [VT] + M [tau] + N*NB [work]
1309*
1310 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
1311 $ work( nwork ), lwork - nwork + 1, ierr )
1312*
1313* Produce L in A, zeroing out other entries
1314*
1315 CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
1316 ie = itau
1317 itauq = ie + m
1318 itaup = itauq + m
1319 nwork = itaup + m
1320*
1321* Bidiagonalize L in A
1322* Workspace: need M*M [VT] + 3*M [e, tauq, taup] + M [work]
1323* Workspace: prefer M*M [VT] + 3*M [e, tauq, taup] + 2*M*NB [work]
1324*
1325 CALL sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
1326 $ work( itaup ), work( nwork ), lwork-nwork+1,
1327 $ ierr )
1328*
1329* Perform bidiagonal SVD, computing left singular vectors
1330* of bidiagonal matrix in U and computing right singular
1331* vectors of bidiagonal matrix in WORK(IVT)
1332* Workspace: need M*M [VT] + 3*M [e, tauq, taup] + BDSPAC
1333*
1334 CALL sbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,
1335 $ work( ivt ), ldwkvt, dum, idum,
1336 $ work( nwork ), iwork, info )
1337*
1338* Overwrite U by left singular vectors of L and WORK(IVT)
1339* by right singular vectors of L
1340* Workspace: need M*M [VT] + 3*M [e, tauq, taup]+ M [work]
1341* Workspace: prefer M*M [VT] + 3*M [e, tauq, taup]+ M*NB [work]
1342*
1343 CALL sormbr( 'Q', 'L', 'N', m, m, m, a, lda,
1344 $ work( itauq ), u, ldu, work( nwork ),
1345 $ lwork - nwork + 1, ierr )
1346 CALL sormbr( 'P', 'R', 'T', m, m, m, a, lda,
1347 $ work( itaup ), work( ivt ), ldwkvt,
1348 $ work( nwork ), lwork - nwork + 1, ierr )
1349*
1350* Multiply right singular vectors of L in WORK(IVT) by
1351* Q in VT, storing result in A
1352* Workspace: need M*M [VT]
1353*
1354 CALL sgemm( 'N', 'N', m, n, m, one, work( ivt ), ldwkvt,
1355 $ vt, ldvt, zero, a, lda )
1356*
1357* Copy right singular vectors of A from A to VT
1358*
1359 CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
1360*
1361 END IF
1362*
1363 ELSE
1364*
1365* N .LT. MNTHR
1366*
1367* Path 5t (N > M, but not much larger)
1368* Reduce to bidiagonal form without LQ decomposition
1369*
1370 ie = 1
1371 itauq = ie + m
1372 itaup = itauq + m
1373 nwork = itaup + m
1374*
1375* Bidiagonalize A
1376* Workspace: need 3*M [e, tauq, taup] + N [work]
1377* Workspace: prefer 3*M [e, tauq, taup] + (M+N)*NB [work]
1378*
1379 CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
1380 $ work( itaup ), work( nwork ), lwork-nwork+1,
1381 $ ierr )
1382 IF( wntqn ) THEN
1383*
1384* Path 5tn (N > M, JOBZ='N')
1385* Perform bidiagonal SVD, only computing singular values
1386* Workspace: need 3*M [e, tauq, taup] + BDSPAC
1387*
1388 CALL sbdsdc( 'L', 'N', m, s, work( ie ), dum, 1, dum, 1,
1389 $ dum, idum, work( nwork ), iwork, info )
1390 ELSE IF( wntqo ) THEN
1391* Path 5to (N > M, JOBZ='O')
1392 ldwkvt = m
1393 ivt = nwork
1394 IF( lwork .GE. m*n + 3*m + bdspac ) THEN
1395*
1396* WORK( IVT ) is M by N
1397*
1398 CALL slaset( 'F', m, n, zero, zero, work( ivt ),
1399 $ ldwkvt )
1400 nwork = ivt + ldwkvt*n
1401* IL is unused; silence compile warnings
1402 il = -1
1403 ELSE
1404*
1405* WORK( IVT ) is M by M
1406*
1407 nwork = ivt + ldwkvt*m
1408 il = nwork
1409*
1410* WORK(IL) is M by CHUNK
1411*
1412 chunk = ( lwork - m*m - 3*m ) / m
1413 END IF
1414*
1415* Perform bidiagonal SVD, computing left singular vectors
1416* of bidiagonal matrix in U and computing right singular
1417* vectors of bidiagonal matrix in WORK(IVT)
1418* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + BDSPAC
1419*
1420 CALL sbdsdc( 'L', 'I', m, s, work( ie ), u, ldu,
1421 $ work( ivt ), ldwkvt, dum, idum,
1422 $ work( nwork ), iwork, info )
1423*
1424* Overwrite U by left singular vectors of A
1425* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M [work]
1426* Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work]
1427*
1428 CALL sormbr( 'Q', 'L', 'N', m, m, n, a, lda,
1429 $ work( itauq ), u, ldu, work( nwork ),
1430 $ lwork - nwork + 1, ierr )
1431*
1432 IF( lwork .GE. m*n + 3*m + bdspac ) THEN
1433*
1434* Path 5to-fast
1435* Overwrite WORK(IVT) by left singular vectors of A
1436* Workspace: need 3*M [e, tauq, taup] + M*N [VT] + M [work]
1437* Workspace: prefer 3*M [e, tauq, taup] + M*N [VT] + M*NB [work]
1438*
1439 CALL sormbr( 'P', 'R', 'T', m, n, m, a, lda,
1440 $ work( itaup ), work( ivt ), ldwkvt,
1441 $ work( nwork ), lwork - nwork + 1, ierr )
1442*
1443* Copy right singular vectors of A from WORK(IVT) to A
1444*
1445 CALL slacpy( 'F', m, n, work( ivt ), ldwkvt, a, lda )
1446 ELSE
1447*
1448* Path 5to-slow
1449* Generate P**T in A
1450* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M [work]
1451* Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work]
1452*
1453 CALL sorgbr( 'P', m, n, m, a, lda, work( itaup ),
1454 $ work( nwork ), lwork - nwork + 1, ierr )
1455*
1456* Multiply Q in A by right singular vectors of
1457* bidiagonal matrix in WORK(IVT), storing result in
1458* WORK(IL) and copying to A
1459* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M*NB [L]
1460* Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*N [L]
1461*
1462 DO 40 i = 1, n, chunk
1463 blk = min( n - i + 1, chunk )
1464 CALL sgemm( 'N', 'N', m, blk, m, one, work( ivt ),
1465 $ ldwkvt, a( 1, i ), lda, zero,
1466 $ work( il ), m )
1467 CALL slacpy( 'F', m, blk, work( il ), m, a( 1, i ),
1468 $ lda )
1469 40 CONTINUE
1470 END IF
1471 ELSE IF( wntqs ) THEN
1472*
1473* Path 5ts (N > M, JOBZ='S')
1474* Perform bidiagonal SVD, computing left singular vectors
1475* of bidiagonal matrix in U and computing right singular
1476* vectors of bidiagonal matrix in VT
1477* Workspace: need 3*M [e, tauq, taup] + BDSPAC
1478*
1479 CALL slaset( 'F', m, n, zero, zero, vt, ldvt )
1480 CALL sbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,
1481 $ ldvt, dum, idum, work( nwork ), iwork,
1482 $ info )
1483*
1484* Overwrite U by left singular vectors of A and VT
1485* by right singular vectors of A
1486* Workspace: need 3*M [e, tauq, taup] + M [work]
1487* Workspace: prefer 3*M [e, tauq, taup] + M*NB [work]
1488*
1489 CALL sormbr( 'Q', 'L', 'N', m, m, n, a, lda,
1490 $ work( itauq ), u, ldu, work( nwork ),
1491 $ lwork - nwork + 1, ierr )
1492 CALL sormbr( 'P', 'R', 'T', m, n, m, a, lda,
1493 $ work( itaup ), vt, ldvt, work( nwork ),
1494 $ lwork - nwork + 1, ierr )
1495 ELSE IF( wntqa ) THEN
1496*
1497* Path 5ta (N > M, JOBZ='A')
1498* Perform bidiagonal SVD, computing left singular vectors
1499* of bidiagonal matrix in U and computing right singular
1500* vectors of bidiagonal matrix in VT
1501* Workspace: need 3*M [e, tauq, taup] + BDSPAC
1502*
1503 CALL slaset( 'F', n, n, zero, zero, vt, ldvt )
1504 CALL sbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,
1505 $ ldvt, dum, idum, work( nwork ), iwork,
1506 $ info )
1507*
1508* Set the right corner of VT to identity matrix
1509*
1510 IF( n.GT.m ) THEN
1511 CALL slaset( 'F', n-m, n-m, zero, one, vt(m+1,m+1),
1512 $ ldvt )
1513 END IF
1514*
1515* Overwrite U by left singular vectors of A and VT
1516* by right singular vectors of A
1517* Workspace: need 3*M [e, tauq, taup] + N [work]
1518* Workspace: prefer 3*M [e, tauq, taup] + N*NB [work]
1519*
1520 CALL sormbr( 'Q', 'L', 'N', m, m, n, a, lda,
1521 $ work( itauq ), u, ldu, work( nwork ),
1522 $ lwork - nwork + 1, ierr )
1523 CALL sormbr( 'P', 'R', 'T', n, n, m, a, lda,
1524 $ work( itaup ), vt, ldvt, work( nwork ),
1525 $ lwork - nwork + 1, ierr )
1526 END IF
1527*
1528 END IF
1529*
1530 END IF
1531*
1532* Undo scaling if necessary
1533*
1534 IF( iscl.EQ.1 ) THEN
1535 IF( anrm.GT.bignum )
1536 $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
1537 $ ierr )
1538 IF( anrm.LT.smlnum )
1539 $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
1540 $ ierr )
1541 END IF
1542*
1543* Return optimal workspace in WORK(1)
1544*
1545 work( 1 ) = sroundup_lwork( maxwrk )
1546*
1547 RETURN
1548*
1549* End of SGESDD
1550*
logical function sisnan(sin)
SISNAN tests input for NaN.
Definition sisnan.f:59
subroutine sbdsdc(uplo, compq, n, d, e, u, ldu, vt, ldvt, q, iq, work, iwork, info)
SBDSDC
Definition sbdsdc.f:205
subroutine sorgbr(vect, m, n, k, a, lda, tau, work, lwork, info)
SORGBR
Definition sorgbr.f:157
real function slange(norm, m, n, a, lda, work)
SLANGE returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value ...
Definition slange.f:114
subroutine sgebrd(m, n, a, lda, d, e, tauq, taup, work, lwork, info)
SGEBRD
Definition sgebrd.f:205
subroutine sormbr(vect, side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
SORMBR
Definition sormbr.f:196
subroutine sorglq(m, n, k, a, lda, tau, work, lwork, info)
SORGLQ
Definition sorglq.f:127
subroutine sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
SGEMM
Definition sgemm.f:187
real function sroundup_lwork(lwork)
SROUNDUP_LWORK

◆ sgesvd()

subroutine sgesvd ( character jobu,
character jobvt,
integer m,
integer n,
real, dimension( lda, * ) a,
integer lda,
real, dimension( * ) s,
real, dimension( ldu, * ) u,
integer ldu,
real, dimension( ldvt, * ) vt,
integer ldvt,
real, dimension( * ) work,
integer lwork,
integer info )

SGESVD computes the singular value decomposition (SVD) for GE matrices

Download SGESVD + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> SGESVD computes the singular value decomposition (SVD) of a real
!> M-by-N matrix A, optionally computing the left and/or right singular
!> vectors. The SVD is written
!>
!>      A = U * SIGMA * transpose(V)
!>
!> where SIGMA is an M-by-N matrix which is zero except for its
!> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
!> V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
!> are the singular values of A; they are real and non-negative, and
!> are returned in descending order.  The first min(m,n) columns of
!> U and V are the left and right singular vectors of A.
!>
!> Note that the routine returns V**T, not V.
!> 
Parameters
[in]JOBU
!>          JOBU is CHARACTER*1
!>          Specifies options for computing all or part of the matrix U:
!>          = 'A':  all M columns of U are returned in array U:
!>          = 'S':  the first min(m,n) columns of U (the left singular
!>                  vectors) are returned in the array U;
!>          = 'O':  the first min(m,n) columns of U (the left singular
!>                  vectors) are overwritten on the array A;
!>          = 'N':  no columns of U (no left singular vectors) are
!>                  computed.
!> 
[in]JOBVT
!>          JOBVT is CHARACTER*1
!>          Specifies options for computing all or part of the matrix
!>          V**T:
!>          = 'A':  all N rows of V**T are returned in the array VT;
!>          = 'S':  the first min(m,n) rows of V**T (the right singular
!>                  vectors) are returned in the array VT;
!>          = 'O':  the first min(m,n) rows of V**T (the right singular
!>                  vectors) are overwritten on the array A;
!>          = 'N':  no rows of V**T (no right singular vectors) are
!>                  computed.
!>
!>          JOBVT and JOBU cannot both be 'O'.
!> 
[in]M
!>          M is INTEGER
!>          The number of rows of the input matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>          The number of columns of the input matrix A.  N >= 0.
!> 
[in,out]A
!>          A is REAL array, dimension (LDA,N)
!>          On entry, the M-by-N matrix A.
!>          On exit,
!>          if JOBU = 'O',  A is overwritten with the first min(m,n)
!>                          columns of U (the left singular vectors,
!>                          stored columnwise);
!>          if JOBVT = 'O', A is overwritten with the first min(m,n)
!>                          rows of V**T (the right singular vectors,
!>                          stored rowwise);
!>          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
!>                          are destroyed.
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A.  LDA >= max(1,M).
!> 
[out]S
!>          S is REAL array, dimension (min(M,N))
!>          The singular values of A, sorted so that S(i) >= S(i+1).
!> 
[out]U
!>          U is REAL array, dimension (LDU,UCOL)
!>          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
!>          If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
!>          if JOBU = 'S', U contains the first min(m,n) columns of U
!>          (the left singular vectors, stored columnwise);
!>          if JOBU = 'N' or 'O', U is not referenced.
!> 
[in]LDU
!>          LDU is INTEGER
!>          The leading dimension of the array U.  LDU >= 1; if
!>          JOBU = 'S' or 'A', LDU >= M.
!> 
[out]VT
!>          VT is REAL array, dimension (LDVT,N)
!>          If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
!>          V**T;
!>          if JOBVT = 'S', VT contains the first min(m,n) rows of
!>          V**T (the right singular vectors, stored rowwise);
!>          if JOBVT = 'N' or 'O', VT is not referenced.
!> 
[in]LDVT
!>          LDVT is INTEGER
!>          The leading dimension of the array VT.  LDVT >= 1; if
!>          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
!> 
[out]WORK
!>          WORK is REAL array, dimension (MAX(1,LWORK))
!>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
!>          if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
!>          superdiagonal elements of an upper bidiagonal matrix B
!>          whose diagonal is in S (not necessarily sorted). B
!>          satisfies A = U * B * VT, so it has the same singular values
!>          as A, and singular vectors related by U and VT.
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          The dimension of the array WORK.
!>          LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
!>             - PATH 1  (M much larger than N, JOBU='N')
!>             - PATH 1t (N much larger than M, JOBVT='N')
!>          LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths
!>          For good performance, LWORK should generally be larger.
!>
!>          If LWORK = -1, then a workspace query is assumed; the routine
!>          only calculates the optimal size of the WORK array, returns
!>          this value as the first entry of the WORK array, and no error
!>          message related to LWORK is issued by XERBLA.
!> 
[out]INFO
!>          INFO is INTEGER
!>          = 0:  successful exit.
!>          < 0:  if INFO = -i, the i-th argument had an illegal value.
!>          > 0:  if SBDSQR did not converge, INFO specifies how many
!>                superdiagonals of an intermediate bidiagonal form B
!>                did not converge to zero. See the description of WORK
!>                above for details.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 209 of file sgesvd.f.

211*
212* -- LAPACK driver routine --
213* -- LAPACK is a software package provided by Univ. of Tennessee, --
214* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
215*
216* .. Scalar Arguments ..
217 CHARACTER JOBU, JOBVT
218 INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
219* ..
220* .. Array Arguments ..
221 REAL A( LDA, * ), S( * ), U( LDU, * ),
222 $ VT( LDVT, * ), WORK( * )
223* ..
224*
225* =====================================================================
226*
227* .. Parameters ..
228 REAL ZERO, ONE
229 parameter( zero = 0.0e0, one = 1.0e0 )
230* ..
231* .. Local Scalars ..
232 LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
233 $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
234 INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
235 $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
236 $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
237 $ NRVT, WRKBL
238 INTEGER LWORK_SGEQRF, LWORK_SORGQR_N, LWORK_SORGQR_M,
239 $ LWORK_SGEBRD, LWORK_SORGBR_P, LWORK_SORGBR_Q,
240 $ LWORK_SGELQF, LWORK_SORGLQ_N, LWORK_SORGLQ_M
241 REAL ANRM, BIGNUM, EPS, SMLNUM
242* ..
243* .. Local Arrays ..
244 REAL DUM( 1 )
245* ..
246* .. External Subroutines ..
247 EXTERNAL sbdsqr, sgebrd, sgelqf, sgemm, sgeqrf, slacpy,
249 $ xerbla
250* ..
251* .. External Functions ..
252 LOGICAL LSAME
253 INTEGER ILAENV
254 REAL SLAMCH, SLANGE
255 EXTERNAL lsame, ilaenv, slamch, slange
256* ..
257* .. Intrinsic Functions ..
258 INTRINSIC max, min, sqrt
259* ..
260* .. Executable Statements ..
261*
262* Test the input arguments
263*
264 info = 0
265 minmn = min( m, n )
266 wntua = lsame( jobu, 'A' )
267 wntus = lsame( jobu, 'S' )
268 wntuas = wntua .OR. wntus
269 wntuo = lsame( jobu, 'O' )
270 wntun = lsame( jobu, 'N' )
271 wntva = lsame( jobvt, 'A' )
272 wntvs = lsame( jobvt, 'S' )
273 wntvas = wntva .OR. wntvs
274 wntvo = lsame( jobvt, 'O' )
275 wntvn = lsame( jobvt, 'N' )
276 lquery = ( lwork.EQ.-1 )
277*
278 IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
279 info = -1
280 ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
281 $ ( wntvo .AND. wntuo ) ) THEN
282 info = -2
283 ELSE IF( m.LT.0 ) THEN
284 info = -3
285 ELSE IF( n.LT.0 ) THEN
286 info = -4
287 ELSE IF( lda.LT.max( 1, m ) ) THEN
288 info = -6
289 ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
290 info = -9
291 ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
292 $ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
293 info = -11
294 END IF
295*
296* Compute workspace
297* (Note: Comments in the code beginning "Workspace:" describe the
298* minimal amount of workspace needed at that point in the code,
299* as well as the preferred amount for good performance.
300* NB refers to the optimal block size for the immediately
301* following subroutine, as returned by ILAENV.)
302*
303 IF( info.EQ.0 ) THEN
304 minwrk = 1
305 maxwrk = 1
306 IF( m.GE.n .AND. minmn.GT.0 ) THEN
307*
308* Compute space needed for SBDSQR
309*
310 mnthr = ilaenv( 6, 'SGESVD', jobu // jobvt, m, n, 0, 0 )
311 bdspac = 5*n
312* Compute space needed for SGEQRF
313 CALL sgeqrf( m, n, a, lda, dum(1), dum(1), -1, ierr )
314 lwork_sgeqrf = int( dum(1) )
315* Compute space needed for SORGQR
316 CALL sorgqr( m, n, n, a, lda, dum(1), dum(1), -1, ierr )
317 lwork_sorgqr_n = int( dum(1) )
318 CALL sorgqr( m, m, n, a, lda, dum(1), dum(1), -1, ierr )
319 lwork_sorgqr_m = int( dum(1) )
320* Compute space needed for SGEBRD
321 CALL sgebrd( n, n, a, lda, s, dum(1), dum(1),
322 $ dum(1), dum(1), -1, ierr )
323 lwork_sgebrd = int( dum(1) )
324* Compute space needed for SORGBR P
325 CALL sorgbr( 'P', n, n, n, a, lda, dum(1),
326 $ dum(1), -1, ierr )
327 lwork_sorgbr_p = int( dum(1) )
328* Compute space needed for SORGBR Q
329 CALL sorgbr( 'Q', n, n, n, a, lda, dum(1),
330 $ dum(1), -1, ierr )
331 lwork_sorgbr_q = int( dum(1) )
332*
333 IF( m.GE.mnthr ) THEN
334 IF( wntun ) THEN
335*
336* Path 1 (M much larger than N, JOBU='N')
337*
338 maxwrk = n + lwork_sgeqrf
339 maxwrk = max( maxwrk, 3*n+lwork_sgebrd )
340 IF( wntvo .OR. wntvas )
341 $ maxwrk = max( maxwrk, 3*n+lwork_sorgbr_p )
342 maxwrk = max( maxwrk, bdspac )
343 minwrk = max( 4*n, bdspac )
344 ELSE IF( wntuo .AND. wntvn ) THEN
345*
346* Path 2 (M much larger than N, JOBU='O', JOBVT='N')
347*
348 wrkbl = n + lwork_sgeqrf
349 wrkbl = max( wrkbl, n+lwork_sorgqr_n )
350 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
351 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
352 wrkbl = max( wrkbl, bdspac )
353 maxwrk = max( n*n+wrkbl, n*n+m*n+n )
354 minwrk = max( 3*n+m, bdspac )
355 ELSE IF( wntuo .AND. wntvas ) THEN
356*
357* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
358* 'A')
359*
360 wrkbl = n + lwork_sgeqrf
361 wrkbl = max( wrkbl, n+lwork_sorgqr_n )
362 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
363 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
364 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
365 wrkbl = max( wrkbl, bdspac )
366 maxwrk = max( n*n+wrkbl, n*n+m*n+n )
367 minwrk = max( 3*n+m, bdspac )
368 ELSE IF( wntus .AND. wntvn ) THEN
369*
370* Path 4 (M much larger than N, JOBU='S', JOBVT='N')
371*
372 wrkbl = n + lwork_sgeqrf
373 wrkbl = max( wrkbl, n+lwork_sorgqr_n )
374 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
375 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
376 wrkbl = max( wrkbl, bdspac )
377 maxwrk = n*n + wrkbl
378 minwrk = max( 3*n+m, bdspac )
379 ELSE IF( wntus .AND. wntvo ) THEN
380*
381* Path 5 (M much larger than N, JOBU='S', JOBVT='O')
382*
383 wrkbl = n + lwork_sgeqrf
384 wrkbl = max( wrkbl, n+lwork_sorgqr_n )
385 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
386 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
387 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
388 wrkbl = max( wrkbl, bdspac )
389 maxwrk = 2*n*n + wrkbl
390 minwrk = max( 3*n+m, bdspac )
391 ELSE IF( wntus .AND. wntvas ) THEN
392*
393* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
394* 'A')
395*
396 wrkbl = n + lwork_sgeqrf
397 wrkbl = max( wrkbl, n+lwork_sorgqr_n )
398 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
399 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
400 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
401 wrkbl = max( wrkbl, bdspac )
402 maxwrk = n*n + wrkbl
403 minwrk = max( 3*n+m, bdspac )
404 ELSE IF( wntua .AND. wntvn ) THEN
405*
406* Path 7 (M much larger than N, JOBU='A', JOBVT='N')
407*
408 wrkbl = n + lwork_sgeqrf
409 wrkbl = max( wrkbl, n+lwork_sorgqr_m )
410 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
411 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
412 wrkbl = max( wrkbl, bdspac )
413 maxwrk = n*n + wrkbl
414 minwrk = max( 3*n+m, bdspac )
415 ELSE IF( wntua .AND. wntvo ) THEN
416*
417* Path 8 (M much larger than N, JOBU='A', JOBVT='O')
418*
419 wrkbl = n + lwork_sgeqrf
420 wrkbl = max( wrkbl, n+lwork_sorgqr_m )
421 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
422 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
423 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
424 wrkbl = max( wrkbl, bdspac )
425 maxwrk = 2*n*n + wrkbl
426 minwrk = max( 3*n+m, bdspac )
427 ELSE IF( wntua .AND. wntvas ) THEN
428*
429* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
430* 'A')
431*
432 wrkbl = n + lwork_sgeqrf
433 wrkbl = max( wrkbl, n+lwork_sorgqr_m )
434 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
435 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
436 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
437 wrkbl = max( wrkbl, bdspac )
438 maxwrk = n*n + wrkbl
439 minwrk = max( 3*n+m, bdspac )
440 END IF
441 ELSE
442*
443* Path 10 (M at least N, but not much larger)
444*
445 CALL sgebrd( m, n, a, lda, s, dum(1), dum(1),
446 $ dum(1), dum(1), -1, ierr )
447 lwork_sgebrd = int( dum(1) )
448 maxwrk = 3*n + lwork_sgebrd
449 IF( wntus .OR. wntuo ) THEN
450 CALL sorgbr( 'Q', m, n, n, a, lda, dum(1),
451 $ dum(1), -1, ierr )
452 lwork_sorgbr_q = int( dum(1) )
453 maxwrk = max( maxwrk, 3*n+lwork_sorgbr_q )
454 END IF
455 IF( wntua ) THEN
456 CALL sorgbr( 'Q', m, m, n, a, lda, dum(1),
457 $ dum(1), -1, ierr )
458 lwork_sorgbr_q = int( dum(1) )
459 maxwrk = max( maxwrk, 3*n+lwork_sorgbr_q )
460 END IF
461 IF( .NOT.wntvn ) THEN
462 maxwrk = max( maxwrk, 3*n+lwork_sorgbr_p )
463 END IF
464 maxwrk = max( maxwrk, bdspac )
465 minwrk = max( 3*n+m, bdspac )
466 END IF
467 ELSE IF( minmn.GT.0 ) THEN
468*
469* Compute space needed for SBDSQR
470*
471 mnthr = ilaenv( 6, 'SGESVD', jobu // jobvt, m, n, 0, 0 )
472 bdspac = 5*m
473* Compute space needed for SGELQF
474 CALL sgelqf( m, n, a, lda, dum(1), dum(1), -1, ierr )
475 lwork_sgelqf = int( dum(1) )
476* Compute space needed for SORGLQ
477 CALL sorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr )
478 lwork_sorglq_n = int( dum(1) )
479 CALL sorglq( m, n, m, a, lda, dum(1), dum(1), -1, ierr )
480 lwork_sorglq_m = int( dum(1) )
481* Compute space needed for SGEBRD
482 CALL sgebrd( m, m, a, lda, s, dum(1), dum(1),
483 $ dum(1), dum(1), -1, ierr )
484 lwork_sgebrd = int( dum(1) )
485* Compute space needed for SORGBR P
486 CALL sorgbr( 'P', m, m, m, a, n, dum(1),
487 $ dum(1), -1, ierr )
488 lwork_sorgbr_p = int( dum(1) )
489* Compute space needed for SORGBR Q
490 CALL sorgbr( 'Q', m, m, m, a, n, dum(1),
491 $ dum(1), -1, ierr )
492 lwork_sorgbr_q = int( dum(1) )
493 IF( n.GE.mnthr ) THEN
494 IF( wntvn ) THEN
495*
496* Path 1t(N much larger than M, JOBVT='N')
497*
498 maxwrk = m + lwork_sgelqf
499 maxwrk = max( maxwrk, 3*m+lwork_sgebrd )
500 IF( wntuo .OR. wntuas )
501 $ maxwrk = max( maxwrk, 3*m+lwork_sorgbr_q )
502 maxwrk = max( maxwrk, bdspac )
503 minwrk = max( 4*m, bdspac )
504 ELSE IF( wntvo .AND. wntun ) THEN
505*
506* Path 2t(N much larger than M, JOBU='N', JOBVT='O')
507*
508 wrkbl = m + lwork_sgelqf
509 wrkbl = max( wrkbl, m+lwork_sorglq_m )
510 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
511 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
512 wrkbl = max( wrkbl, bdspac )
513 maxwrk = max( m*m+wrkbl, m*m+m*n+m )
514 minwrk = max( 3*m+n, bdspac )
515 ELSE IF( wntvo .AND. wntuas ) THEN
516*
517* Path 3t(N much larger than M, JOBU='S' or 'A',
518* JOBVT='O')
519*
520 wrkbl = m + lwork_sgelqf
521 wrkbl = max( wrkbl, m+lwork_sorglq_m )
522 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
523 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
524 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
525 wrkbl = max( wrkbl, bdspac )
526 maxwrk = max( m*m+wrkbl, m*m+m*n+m )
527 minwrk = max( 3*m+n, bdspac )
528 ELSE IF( wntvs .AND. wntun ) THEN
529*
530* Path 4t(N much larger than M, JOBU='N', JOBVT='S')
531*
532 wrkbl = m + lwork_sgelqf
533 wrkbl = max( wrkbl, m+lwork_sorglq_m )
534 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
535 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
536 wrkbl = max( wrkbl, bdspac )
537 maxwrk = m*m + wrkbl
538 minwrk = max( 3*m+n, bdspac )
539 ELSE IF( wntvs .AND. wntuo ) THEN
540*
541* Path 5t(N much larger than M, JOBU='O', JOBVT='S')
542*
543 wrkbl = m + lwork_sgelqf
544 wrkbl = max( wrkbl, m+lwork_sorglq_m )
545 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
546 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
547 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
548 wrkbl = max( wrkbl, bdspac )
549 maxwrk = 2*m*m + wrkbl
550 minwrk = max( 3*m+n, bdspac )
551 maxwrk = max( maxwrk, minwrk )
552 ELSE IF( wntvs .AND. wntuas ) THEN
553*
554* Path 6t(N much larger than M, JOBU='S' or 'A',
555* JOBVT='S')
556*
557 wrkbl = m + lwork_sgelqf
558 wrkbl = max( wrkbl, m+lwork_sorglq_m )
559 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
560 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
561 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
562 wrkbl = max( wrkbl, bdspac )
563 maxwrk = m*m + wrkbl
564 minwrk = max( 3*m+n, bdspac )
565 ELSE IF( wntva .AND. wntun ) THEN
566*
567* Path 7t(N much larger than M, JOBU='N', JOBVT='A')
568*
569 wrkbl = m + lwork_sgelqf
570 wrkbl = max( wrkbl, m+lwork_sorglq_n )
571 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
572 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
573 wrkbl = max( wrkbl, bdspac )
574 maxwrk = m*m + wrkbl
575 minwrk = max( 3*m+n, bdspac )
576 ELSE IF( wntva .AND. wntuo ) THEN
577*
578* Path 8t(N much larger than M, JOBU='O', JOBVT='A')
579*
580 wrkbl = m + lwork_sgelqf
581 wrkbl = max( wrkbl, m+lwork_sorglq_n )
582 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
583 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
584 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
585 wrkbl = max( wrkbl, bdspac )
586 maxwrk = 2*m*m + wrkbl
587 minwrk = max( 3*m+n, bdspac )
588 ELSE IF( wntva .AND. wntuas ) THEN
589*
590* Path 9t(N much larger than M, JOBU='S' or 'A',
591* JOBVT='A')
592*
593 wrkbl = m + lwork_sgelqf
594 wrkbl = max( wrkbl, m+lwork_sorglq_n )
595 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
596 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
597 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
598 wrkbl = max( wrkbl, bdspac )
599 maxwrk = m*m + wrkbl
600 minwrk = max( 3*m+n, bdspac )
601 END IF
602 ELSE
603*
604* Path 10t(N greater than M, but not much larger)
605*
606 CALL sgebrd( m, n, a, lda, s, dum(1), dum(1),
607 $ dum(1), dum(1), -1, ierr )
608 lwork_sgebrd = int( dum(1) )
609 maxwrk = 3*m + lwork_sgebrd
610 IF( wntvs .OR. wntvo ) THEN
611* Compute space needed for SORGBR P
612 CALL sorgbr( 'P', m, n, m, a, n, dum(1),
613 $ dum(1), -1, ierr )
614 lwork_sorgbr_p = int( dum(1) )
615 maxwrk = max( maxwrk, 3*m+lwork_sorgbr_p )
616 END IF
617 IF( wntva ) THEN
618 CALL sorgbr( 'P', n, n, m, a, n, dum(1),
619 $ dum(1), -1, ierr )
620 lwork_sorgbr_p = int( dum(1) )
621 maxwrk = max( maxwrk, 3*m+lwork_sorgbr_p )
622 END IF
623 IF( .NOT.wntun ) THEN
624 maxwrk = max( maxwrk, 3*m+lwork_sorgbr_q )
625 END IF
626 maxwrk = max( maxwrk, bdspac )
627 minwrk = max( 3*m+n, bdspac )
628 END IF
629 END IF
630 maxwrk = max( maxwrk, minwrk )
631 work( 1 ) = maxwrk
632*
633 IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
634 info = -13
635 END IF
636 END IF
637*
638 IF( info.NE.0 ) THEN
639 CALL xerbla( 'SGESVD', -info )
640 RETURN
641 ELSE IF( lquery ) THEN
642 RETURN
643 END IF
644*
645* Quick return if possible
646*
647 IF( m.EQ.0 .OR. n.EQ.0 ) THEN
648 RETURN
649 END IF
650*
651* Get machine constants
652*
653 eps = slamch( 'P' )
654 smlnum = sqrt( slamch( 'S' ) ) / eps
655 bignum = one / smlnum
656*
657* Scale A if max element outside range [SMLNUM,BIGNUM]
658*
659 anrm = slange( 'M', m, n, a, lda, dum )
660 iscl = 0
661 IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
662 iscl = 1
663 CALL slascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
664 ELSE IF( anrm.GT.bignum ) THEN
665 iscl = 1
666 CALL slascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
667 END IF
668*
669 IF( m.GE.n ) THEN
670*
671* A has at least as many rows as columns. If A has sufficiently
672* more rows than columns, first reduce using the QR
673* decomposition (if sufficient workspace available)
674*
675 IF( m.GE.mnthr ) THEN
676*
677 IF( wntun ) THEN
678*
679* Path 1 (M much larger than N, JOBU='N')
680* No left singular vectors to be computed
681*
682 itau = 1
683 iwork = itau + n
684*
685* Compute A=Q*R
686* (Workspace: need 2*N, prefer N+N*NB)
687*
688 CALL sgeqrf( m, n, a, lda, work( itau ), work( iwork ),
689 $ lwork-iwork+1, ierr )
690*
691* Zero out below R
692*
693 IF( n .GT. 1 ) THEN
694 CALL slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
695 $ lda )
696 END IF
697 ie = 1
698 itauq = ie + n
699 itaup = itauq + n
700 iwork = itaup + n
701*
702* Bidiagonalize R in A
703* (Workspace: need 4*N, prefer 3*N+2*N*NB)
704*
705 CALL sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
706 $ work( itaup ), work( iwork ), lwork-iwork+1,
707 $ ierr )
708 ncvt = 0
709 IF( wntvo .OR. wntvas ) THEN
710*
711* If right singular vectors desired, generate P'.
712* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
713*
714 CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
715 $ work( iwork ), lwork-iwork+1, ierr )
716 ncvt = n
717 END IF
718 iwork = ie + n
719*
720* Perform bidiagonal QR iteration, computing right
721* singular vectors of A in A if desired
722* (Workspace: need BDSPAC)
723*
724 CALL sbdsqr( 'U', n, ncvt, 0, 0, s, work( ie ), a, lda,
725 $ dum, 1, dum, 1, work( iwork ), info )
726*
727* If right singular vectors desired in VT, copy them there
728*
729 IF( wntvas )
730 $ CALL slacpy( 'F', n, n, a, lda, vt, ldvt )
731*
732 ELSE IF( wntuo .AND. wntvn ) THEN
733*
734* Path 2 (M much larger than N, JOBU='O', JOBVT='N')
735* N left singular vectors to be overwritten on A and
736* no right singular vectors to be computed
737*
738 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
739*
740* Sufficient workspace for a fast algorithm
741*
742 ir = 1
743 IF( lwork.GE.max( wrkbl, lda*n+n )+lda*n ) THEN
744*
745* WORK(IU) is LDA by N, WORK(IR) is LDA by N
746*
747 ldwrku = lda
748 ldwrkr = lda
749 ELSE IF( lwork.GE.max( wrkbl, lda*n+n )+n*n ) THEN
750*
751* WORK(IU) is LDA by N, WORK(IR) is N by N
752*
753 ldwrku = lda
754 ldwrkr = n
755 ELSE
756*
757* WORK(IU) is LDWRKU by N, WORK(IR) is N by N
758*
759 ldwrku = ( lwork-n*n-n ) / n
760 ldwrkr = n
761 END IF
762 itau = ir + ldwrkr*n
763 iwork = itau + n
764*
765* Compute A=Q*R
766* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
767*
768 CALL sgeqrf( m, n, a, lda, work( itau ),
769 $ work( iwork ), lwork-iwork+1, ierr )
770*
771* Copy R to WORK(IR) and zero out below it
772*
773 CALL slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
774 CALL slaset( 'L', n-1, n-1, zero, zero, work( ir+1 ),
775 $ ldwrkr )
776*
777* Generate Q in A
778* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
779*
780 CALL sorgqr( m, n, n, a, lda, work( itau ),
781 $ work( iwork ), lwork-iwork+1, ierr )
782 ie = itau
783 itauq = ie + n
784 itaup = itauq + n
785 iwork = itaup + n
786*
787* Bidiagonalize R in WORK(IR)
788* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
789*
790 CALL sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
791 $ work( itauq ), work( itaup ),
792 $ work( iwork ), lwork-iwork+1, ierr )
793*
794* Generate left vectors bidiagonalizing R
795* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
796*
797 CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
798 $ work( itauq ), work( iwork ),
799 $ lwork-iwork+1, ierr )
800 iwork = ie + n
801*
802* Perform bidiagonal QR iteration, computing left
803* singular vectors of R in WORK(IR)
804* (Workspace: need N*N+BDSPAC)
805*
806 CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum, 1,
807 $ work( ir ), ldwrkr, dum, 1,
808 $ work( iwork ), info )
809 iu = ie + n
810*
811* Multiply Q in A by left singular vectors of R in
812* WORK(IR), storing result in WORK(IU) and copying to A
813* (Workspace: need N*N+2*N, prefer N*N+M*N+N)
814*
815 DO 10 i = 1, m, ldwrku
816 chunk = min( m-i+1, ldwrku )
817 CALL sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
818 $ lda, work( ir ), ldwrkr, zero,
819 $ work( iu ), ldwrku )
820 CALL slacpy( 'F', chunk, n, work( iu ), ldwrku,
821 $ a( i, 1 ), lda )
822 10 CONTINUE
823*
824 ELSE
825*
826* Insufficient workspace for a fast algorithm
827*
828 ie = 1
829 itauq = ie + n
830 itaup = itauq + n
831 iwork = itaup + n
832*
833* Bidiagonalize A
834* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
835*
836 CALL sgebrd( m, n, a, lda, s, work( ie ),
837 $ work( itauq ), work( itaup ),
838 $ work( iwork ), lwork-iwork+1, ierr )
839*
840* Generate left vectors bidiagonalizing A
841* (Workspace: need 4*N, prefer 3*N+N*NB)
842*
843 CALL sorgbr( 'Q', m, n, n, a, lda, work( itauq ),
844 $ work( iwork ), lwork-iwork+1, ierr )
845 iwork = ie + n
846*
847* Perform bidiagonal QR iteration, computing left
848* singular vectors of A in A
849* (Workspace: need BDSPAC)
850*
851 CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum, 1,
852 $ a, lda, dum, 1, work( iwork ), info )
853*
854 END IF
855*
856 ELSE IF( wntuo .AND. wntvas ) THEN
857*
858* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
859* N left singular vectors to be overwritten on A and
860* N right singular vectors to be computed in VT
861*
862 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
863*
864* Sufficient workspace for a fast algorithm
865*
866 ir = 1
867 IF( lwork.GE.max( wrkbl, lda*n+n )+lda*n ) THEN
868*
869* WORK(IU) is LDA by N and WORK(IR) is LDA by N
870*
871 ldwrku = lda
872 ldwrkr = lda
873 ELSE IF( lwork.GE.max( wrkbl, lda*n+n )+n*n ) THEN
874*
875* WORK(IU) is LDA by N and WORK(IR) is N by N
876*
877 ldwrku = lda
878 ldwrkr = n
879 ELSE
880*
881* WORK(IU) is LDWRKU by N and WORK(IR) is N by N
882*
883 ldwrku = ( lwork-n*n-n ) / n
884 ldwrkr = n
885 END IF
886 itau = ir + ldwrkr*n
887 iwork = itau + n
888*
889* Compute A=Q*R
890* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
891*
892 CALL sgeqrf( m, n, a, lda, work( itau ),
893 $ work( iwork ), lwork-iwork+1, ierr )
894*
895* Copy R to VT, zeroing out below it
896*
897 CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
898 IF( n.GT.1 )
899 $ CALL slaset( 'L', n-1, n-1, zero, zero,
900 $ vt( 2, 1 ), ldvt )
901*
902* Generate Q in A
903* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
904*
905 CALL sorgqr( m, n, n, a, lda, work( itau ),
906 $ work( iwork ), lwork-iwork+1, ierr )
907 ie = itau
908 itauq = ie + n
909 itaup = itauq + n
910 iwork = itaup + n
911*
912* Bidiagonalize R in VT, copying result to WORK(IR)
913* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
914*
915 CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
916 $ work( itauq ), work( itaup ),
917 $ work( iwork ), lwork-iwork+1, ierr )
918 CALL slacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
919*
920* Generate left vectors bidiagonalizing R in WORK(IR)
921* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
922*
923 CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
924 $ work( itauq ), work( iwork ),
925 $ lwork-iwork+1, ierr )
926*
927* Generate right vectors bidiagonalizing R in VT
928* (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
929*
930 CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
931 $ work( iwork ), lwork-iwork+1, ierr )
932 iwork = ie + n
933*
934* Perform bidiagonal QR iteration, computing left
935* singular vectors of R in WORK(IR) and computing right
936* singular vectors of R in VT
937* (Workspace: need N*N+BDSPAC)
938*
939 CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt, ldvt,
940 $ work( ir ), ldwrkr, dum, 1,
941 $ work( iwork ), info )
942 iu = ie + n
943*
944* Multiply Q in A by left singular vectors of R in
945* WORK(IR), storing result in WORK(IU) and copying to A
946* (Workspace: need N*N+2*N, prefer N*N+M*N+N)
947*
948 DO 20 i = 1, m, ldwrku
949 chunk = min( m-i+1, ldwrku )
950 CALL sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
951 $ lda, work( ir ), ldwrkr, zero,
952 $ work( iu ), ldwrku )
953 CALL slacpy( 'F', chunk, n, work( iu ), ldwrku,
954 $ a( i, 1 ), lda )
955 20 CONTINUE
956*
957 ELSE
958*
959* Insufficient workspace for a fast algorithm
960*
961 itau = 1
962 iwork = itau + n
963*
964* Compute A=Q*R
965* (Workspace: need 2*N, prefer N+N*NB)
966*
967 CALL sgeqrf( m, n, a, lda, work( itau ),
968 $ work( iwork ), lwork-iwork+1, ierr )
969*
970* Copy R to VT, zeroing out below it
971*
972 CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
973 IF( n.GT.1 )
974 $ CALL slaset( 'L', n-1, n-1, zero, zero,
975 $ vt( 2, 1 ), ldvt )
976*
977* Generate Q in A
978* (Workspace: need 2*N, prefer N+N*NB)
979*
980 CALL sorgqr( m, n, n, a, lda, work( itau ),
981 $ work( iwork ), lwork-iwork+1, ierr )
982 ie = itau
983 itauq = ie + n
984 itaup = itauq + n
985 iwork = itaup + n
986*
987* Bidiagonalize R in VT
988* (Workspace: need 4*N, prefer 3*N+2*N*NB)
989*
990 CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
991 $ work( itauq ), work( itaup ),
992 $ work( iwork ), lwork-iwork+1, ierr )
993*
994* Multiply Q in A by left vectors bidiagonalizing R
995* (Workspace: need 3*N+M, prefer 3*N+M*NB)
996*
997 CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
998 $ work( itauq ), a, lda, work( iwork ),
999 $ lwork-iwork+1, ierr )
1000*
1001* Generate right vectors bidiagonalizing R in VT
1002* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1003*
1004 CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1005 $ work( iwork ), lwork-iwork+1, ierr )
1006 iwork = ie + n
1007*
1008* Perform bidiagonal QR iteration, computing left
1009* singular vectors of A in A and computing right
1010* singular vectors of A in VT
1011* (Workspace: need BDSPAC)
1012*
1013 CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt, ldvt,
1014 $ a, lda, dum, 1, work( iwork ), info )
1015*
1016 END IF
1017*
1018 ELSE IF( wntus ) THEN
1019*
1020 IF( wntvn ) THEN
1021*
1022* Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1023* N left singular vectors to be computed in U and
1024* no right singular vectors to be computed
1025*
1026 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1027*
1028* Sufficient workspace for a fast algorithm
1029*
1030 ir = 1
1031 IF( lwork.GE.wrkbl+lda*n ) THEN
1032*
1033* WORK(IR) is LDA by N
1034*
1035 ldwrkr = lda
1036 ELSE
1037*
1038* WORK(IR) is N by N
1039*
1040 ldwrkr = n
1041 END IF
1042 itau = ir + ldwrkr*n
1043 iwork = itau + n
1044*
1045* Compute A=Q*R
1046* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1047*
1048 CALL sgeqrf( m, n, a, lda, work( itau ),
1049 $ work( iwork ), lwork-iwork+1, ierr )
1050*
1051* Copy R to WORK(IR), zeroing out below it
1052*
1053 CALL slacpy( 'U', n, n, a, lda, work( ir ),
1054 $ ldwrkr )
1055 CALL slaset( 'L', n-1, n-1, zero, zero,
1056 $ work( ir+1 ), ldwrkr )
1057*
1058* Generate Q in A
1059* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1060*
1061 CALL sorgqr( m, n, n, a, lda, work( itau ),
1062 $ work( iwork ), lwork-iwork+1, ierr )
1063 ie = itau
1064 itauq = ie + n
1065 itaup = itauq + n
1066 iwork = itaup + n
1067*
1068* Bidiagonalize R in WORK(IR)
1069* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1070*
1071 CALL sgebrd( n, n, work( ir ), ldwrkr, s,
1072 $ work( ie ), work( itauq ),
1073 $ work( itaup ), work( iwork ),
1074 $ lwork-iwork+1, ierr )
1075*
1076* Generate left vectors bidiagonalizing R in WORK(IR)
1077* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1078*
1079 CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1080 $ work( itauq ), work( iwork ),
1081 $ lwork-iwork+1, ierr )
1082 iwork = ie + n
1083*
1084* Perform bidiagonal QR iteration, computing left
1085* singular vectors of R in WORK(IR)
1086* (Workspace: need N*N+BDSPAC)
1087*
1088 CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1089 $ 1, work( ir ), ldwrkr, dum, 1,
1090 $ work( iwork ), info )
1091*
1092* Multiply Q in A by left singular vectors of R in
1093* WORK(IR), storing result in U
1094* (Workspace: need N*N)
1095*
1096 CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1097 $ work( ir ), ldwrkr, zero, u, ldu )
1098*
1099 ELSE
1100*
1101* Insufficient workspace for a fast algorithm
1102*
1103 itau = 1
1104 iwork = itau + n
1105*
1106* Compute A=Q*R, copying result to U
1107* (Workspace: need 2*N, prefer N+N*NB)
1108*
1109 CALL sgeqrf( m, n, a, lda, work( itau ),
1110 $ work( iwork ), lwork-iwork+1, ierr )
1111 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1112*
1113* Generate Q in U
1114* (Workspace: need 2*N, prefer N+N*NB)
1115*
1116 CALL sorgqr( m, n, n, u, ldu, work( itau ),
1117 $ work( iwork ), lwork-iwork+1, ierr )
1118 ie = itau
1119 itauq = ie + n
1120 itaup = itauq + n
1121 iwork = itaup + n
1122*
1123* Zero out below R in A
1124*
1125 IF( n .GT. 1 ) THEN
1126 CALL slaset( 'L', n-1, n-1, zero, zero,
1127 $ a( 2, 1 ), lda )
1128 END IF
1129*
1130* Bidiagonalize R in A
1131* (Workspace: need 4*N, prefer 3*N+2*N*NB)
1132*
1133 CALL sgebrd( n, n, a, lda, s, work( ie ),
1134 $ work( itauq ), work( itaup ),
1135 $ work( iwork ), lwork-iwork+1, ierr )
1136*
1137* Multiply Q in U by left vectors bidiagonalizing R
1138* (Workspace: need 3*N+M, prefer 3*N+M*NB)
1139*
1140 CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1141 $ work( itauq ), u, ldu, work( iwork ),
1142 $ lwork-iwork+1, ierr )
1143 iwork = ie + n
1144*
1145* Perform bidiagonal QR iteration, computing left
1146* singular vectors of A in U
1147* (Workspace: need BDSPAC)
1148*
1149 CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1150 $ 1, u, ldu, dum, 1, work( iwork ),
1151 $ info )
1152*
1153 END IF
1154*
1155 ELSE IF( wntvo ) THEN
1156*
1157* Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1158* N left singular vectors to be computed in U and
1159* N right singular vectors to be overwritten on A
1160*
1161 IF( lwork.GE.2*n*n+max( 4*n, bdspac ) ) THEN
1162*
1163* Sufficient workspace for a fast algorithm
1164*
1165 iu = 1
1166 IF( lwork.GE.wrkbl+2*lda*n ) THEN
1167*
1168* WORK(IU) is LDA by N and WORK(IR) is LDA by N
1169*
1170 ldwrku = lda
1171 ir = iu + ldwrku*n
1172 ldwrkr = lda
1173 ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1174*
1175* WORK(IU) is LDA by N and WORK(IR) is N by N
1176*
1177 ldwrku = lda
1178 ir = iu + ldwrku*n
1179 ldwrkr = n
1180 ELSE
1181*
1182* WORK(IU) is N by N and WORK(IR) is N by N
1183*
1184 ldwrku = n
1185 ir = iu + ldwrku*n
1186 ldwrkr = n
1187 END IF
1188 itau = ir + ldwrkr*n
1189 iwork = itau + n
1190*
1191* Compute A=Q*R
1192* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1193*
1194 CALL sgeqrf( m, n, a, lda, work( itau ),
1195 $ work( iwork ), lwork-iwork+1, ierr )
1196*
1197* Copy R to WORK(IU), zeroing out below it
1198*
1199 CALL slacpy( 'U', n, n, a, lda, work( iu ),
1200 $ ldwrku )
1201 CALL slaset( 'L', n-1, n-1, zero, zero,
1202 $ work( iu+1 ), ldwrku )
1203*
1204* Generate Q in A
1205* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1206*
1207 CALL sorgqr( m, n, n, a, lda, work( itau ),
1208 $ work( iwork ), lwork-iwork+1, ierr )
1209 ie = itau
1210 itauq = ie + n
1211 itaup = itauq + n
1212 iwork = itaup + n
1213*
1214* Bidiagonalize R in WORK(IU), copying result to
1215* WORK(IR)
1216* (Workspace: need 2*N*N+4*N,
1217* prefer 2*N*N+3*N+2*N*NB)
1218*
1219 CALL sgebrd( n, n, work( iu ), ldwrku, s,
1220 $ work( ie ), work( itauq ),
1221 $ work( itaup ), work( iwork ),
1222 $ lwork-iwork+1, ierr )
1223 CALL slacpy( 'U', n, n, work( iu ), ldwrku,
1224 $ work( ir ), ldwrkr )
1225*
1226* Generate left bidiagonalizing vectors in WORK(IU)
1227* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1228*
1229 CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1230 $ work( itauq ), work( iwork ),
1231 $ lwork-iwork+1, ierr )
1232*
1233* Generate right bidiagonalizing vectors in WORK(IR)
1234* (Workspace: need 2*N*N+4*N-1,
1235* prefer 2*N*N+3*N+(N-1)*NB)
1236*
1237 CALL sorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1238 $ work( itaup ), work( iwork ),
1239 $ lwork-iwork+1, ierr )
1240 iwork = ie + n
1241*
1242* Perform bidiagonal QR iteration, computing left
1243* singular vectors of R in WORK(IU) and computing
1244* right singular vectors of R in WORK(IR)
1245* (Workspace: need 2*N*N+BDSPAC)
1246*
1247 CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ),
1248 $ work( ir ), ldwrkr, work( iu ),
1249 $ ldwrku, dum, 1, work( iwork ), info )
1250*
1251* Multiply Q in A by left singular vectors of R in
1252* WORK(IU), storing result in U
1253* (Workspace: need N*N)
1254*
1255 CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1256 $ work( iu ), ldwrku, zero, u, ldu )
1257*
1258* Copy right singular vectors of R to A
1259* (Workspace: need N*N)
1260*
1261 CALL slacpy( 'F', n, n, work( ir ), ldwrkr, a,
1262 $ lda )
1263*
1264 ELSE
1265*
1266* Insufficient workspace for a fast algorithm
1267*
1268 itau = 1
1269 iwork = itau + n
1270*
1271* Compute A=Q*R, copying result to U
1272* (Workspace: need 2*N, prefer N+N*NB)
1273*
1274 CALL sgeqrf( m, n, a, lda, work( itau ),
1275 $ work( iwork ), lwork-iwork+1, ierr )
1276 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1277*
1278* Generate Q in U
1279* (Workspace: need 2*N, prefer N+N*NB)
1280*
1281 CALL sorgqr( m, n, n, u, ldu, work( itau ),
1282 $ work( iwork ), lwork-iwork+1, ierr )
1283 ie = itau
1284 itauq = ie + n
1285 itaup = itauq + n
1286 iwork = itaup + n
1287*
1288* Zero out below R in A
1289*
1290 IF( n .GT. 1 ) THEN
1291 CALL slaset( 'L', n-1, n-1, zero, zero,
1292 $ a( 2, 1 ), lda )
1293 END IF
1294*
1295* Bidiagonalize R in A
1296* (Workspace: need 4*N, prefer 3*N+2*N*NB)
1297*
1298 CALL sgebrd( n, n, a, lda, s, work( ie ),
1299 $ work( itauq ), work( itaup ),
1300 $ work( iwork ), lwork-iwork+1, ierr )
1301*
1302* Multiply Q in U by left vectors bidiagonalizing R
1303* (Workspace: need 3*N+M, prefer 3*N+M*NB)
1304*
1305 CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1306 $ work( itauq ), u, ldu, work( iwork ),
1307 $ lwork-iwork+1, ierr )
1308*
1309* Generate right vectors bidiagonalizing R in A
1310* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1311*
1312 CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
1313 $ work( iwork ), lwork-iwork+1, ierr )
1314 iwork = ie + n
1315*
1316* Perform bidiagonal QR iteration, computing left
1317* singular vectors of A in U and computing right
1318* singular vectors of A in A
1319* (Workspace: need BDSPAC)
1320*
1321 CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1322 $ lda, u, ldu, dum, 1, work( iwork ),
1323 $ info )
1324*
1325 END IF
1326*
1327 ELSE IF( wntvas ) THEN
1328*
1329* Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1330* or 'A')
1331* N left singular vectors to be computed in U and
1332* N right singular vectors to be computed in VT
1333*
1334 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1335*
1336* Sufficient workspace for a fast algorithm
1337*
1338 iu = 1
1339 IF( lwork.GE.wrkbl+lda*n ) THEN
1340*
1341* WORK(IU) is LDA by N
1342*
1343 ldwrku = lda
1344 ELSE
1345*
1346* WORK(IU) is N by N
1347*
1348 ldwrku = n
1349 END IF
1350 itau = iu + ldwrku*n
1351 iwork = itau + n
1352*
1353* Compute A=Q*R
1354* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1355*
1356 CALL sgeqrf( m, n, a, lda, work( itau ),
1357 $ work( iwork ), lwork-iwork+1, ierr )
1358*
1359* Copy R to WORK(IU), zeroing out below it
1360*
1361 CALL slacpy( 'U', n, n, a, lda, work( iu ),
1362 $ ldwrku )
1363 CALL slaset( 'L', n-1, n-1, zero, zero,
1364 $ work( iu+1 ), ldwrku )
1365*
1366* Generate Q in A
1367* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1368*
1369 CALL sorgqr( m, n, n, a, lda, work( itau ),
1370 $ work( iwork ), lwork-iwork+1, ierr )
1371 ie = itau
1372 itauq = ie + n
1373 itaup = itauq + n
1374 iwork = itaup + n
1375*
1376* Bidiagonalize R in WORK(IU), copying result to VT
1377* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1378*
1379 CALL sgebrd( n, n, work( iu ), ldwrku, s,
1380 $ work( ie ), work( itauq ),
1381 $ work( itaup ), work( iwork ),
1382 $ lwork-iwork+1, ierr )
1383 CALL slacpy( 'U', n, n, work( iu ), ldwrku, vt,
1384 $ ldvt )
1385*
1386* Generate left bidiagonalizing vectors in WORK(IU)
1387* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1388*
1389 CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1390 $ work( itauq ), work( iwork ),
1391 $ lwork-iwork+1, ierr )
1392*
1393* Generate right bidiagonalizing vectors in VT
1394* (Workspace: need N*N+4*N-1,
1395* prefer N*N+3*N+(N-1)*NB)
1396*
1397 CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1398 $ work( iwork ), lwork-iwork+1, ierr )
1399 iwork = ie + n
1400*
1401* Perform bidiagonal QR iteration, computing left
1402* singular vectors of R in WORK(IU) and computing
1403* right singular vectors of R in VT
1404* (Workspace: need N*N+BDSPAC)
1405*
1406 CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1407 $ ldvt, work( iu ), ldwrku, dum, 1,
1408 $ work( iwork ), info )
1409*
1410* Multiply Q in A by left singular vectors of R in
1411* WORK(IU), storing result in U
1412* (Workspace: need N*N)
1413*
1414 CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1415 $ work( iu ), ldwrku, zero, u, ldu )
1416*
1417 ELSE
1418*
1419* Insufficient workspace for a fast algorithm
1420*
1421 itau = 1
1422 iwork = itau + n
1423*
1424* Compute A=Q*R, copying result to U
1425* (Workspace: need 2*N, prefer N+N*NB)
1426*
1427 CALL sgeqrf( m, n, a, lda, work( itau ),
1428 $ work( iwork ), lwork-iwork+1, ierr )
1429 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1430*
1431* Generate Q in U
1432* (Workspace: need 2*N, prefer N+N*NB)
1433*
1434 CALL sorgqr( m, n, n, u, ldu, work( itau ),
1435 $ work( iwork ), lwork-iwork+1, ierr )
1436*
1437* Copy R to VT, zeroing out below it
1438*
1439 CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
1440 IF( n.GT.1 )
1441 $ CALL slaset( 'L', n-1, n-1, zero, zero,
1442 $ vt( 2, 1 ), ldvt )
1443 ie = itau
1444 itauq = ie + n
1445 itaup = itauq + n
1446 iwork = itaup + n
1447*
1448* Bidiagonalize R in VT
1449* (Workspace: need 4*N, prefer 3*N+2*N*NB)
1450*
1451 CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
1452 $ work( itauq ), work( itaup ),
1453 $ work( iwork ), lwork-iwork+1, ierr )
1454*
1455* Multiply Q in U by left bidiagonalizing vectors
1456* in VT
1457* (Workspace: need 3*N+M, prefer 3*N+M*NB)
1458*
1459 CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1460 $ work( itauq ), u, ldu, work( iwork ),
1461 $ lwork-iwork+1, ierr )
1462*
1463* Generate right bidiagonalizing vectors in VT
1464* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1465*
1466 CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1467 $ work( iwork ), lwork-iwork+1, ierr )
1468 iwork = ie + n
1469*
1470* Perform bidiagonal QR iteration, computing left
1471* singular vectors of A in U and computing right
1472* singular vectors of A in VT
1473* (Workspace: need BDSPAC)
1474*
1475 CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1476 $ ldvt, u, ldu, dum, 1, work( iwork ),
1477 $ info )
1478*
1479 END IF
1480*
1481 END IF
1482*
1483 ELSE IF( wntua ) THEN
1484*
1485 IF( wntvn ) THEN
1486*
1487* Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1488* M left singular vectors to be computed in U and
1489* no right singular vectors to be computed
1490*
1491 IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1492*
1493* Sufficient workspace for a fast algorithm
1494*
1495 ir = 1
1496 IF( lwork.GE.wrkbl+lda*n ) THEN
1497*
1498* WORK(IR) is LDA by N
1499*
1500 ldwrkr = lda
1501 ELSE
1502*
1503* WORK(IR) is N by N
1504*
1505 ldwrkr = n
1506 END IF
1507 itau = ir + ldwrkr*n
1508 iwork = itau + n
1509*
1510* Compute A=Q*R, copying result to U
1511* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1512*
1513 CALL sgeqrf( m, n, a, lda, work( itau ),
1514 $ work( iwork ), lwork-iwork+1, ierr )
1515 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1516*
1517* Copy R to WORK(IR), zeroing out below it
1518*
1519 CALL slacpy( 'U', n, n, a, lda, work( ir ),
1520 $ ldwrkr )
1521 CALL slaset( 'L', n-1, n-1, zero, zero,
1522 $ work( ir+1 ), ldwrkr )
1523*
1524* Generate Q in U
1525* (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1526*
1527 CALL sorgqr( m, m, n, u, ldu, work( itau ),
1528 $ work( iwork ), lwork-iwork+1, ierr )
1529 ie = itau
1530 itauq = ie + n
1531 itaup = itauq + n
1532 iwork = itaup + n
1533*
1534* Bidiagonalize R in WORK(IR)
1535* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1536*
1537 CALL sgebrd( n, n, work( ir ), ldwrkr, s,
1538 $ work( ie ), work( itauq ),
1539 $ work( itaup ), work( iwork ),
1540 $ lwork-iwork+1, ierr )
1541*
1542* Generate left bidiagonalizing vectors in WORK(IR)
1543* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1544*
1545 CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1546 $ work( itauq ), work( iwork ),
1547 $ lwork-iwork+1, ierr )
1548 iwork = ie + n
1549*
1550* Perform bidiagonal QR iteration, computing left
1551* singular vectors of R in WORK(IR)
1552* (Workspace: need N*N+BDSPAC)
1553*
1554 CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1555 $ 1, work( ir ), ldwrkr, dum, 1,
1556 $ work( iwork ), info )
1557*
1558* Multiply Q in U by left singular vectors of R in
1559* WORK(IR), storing result in A
1560* (Workspace: need N*N)
1561*
1562 CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1563 $ work( ir ), ldwrkr, zero, a, lda )
1564*
1565* Copy left singular vectors of A from A to U
1566*
1567 CALL slacpy( 'F', m, n, a, lda, u, ldu )
1568*
1569 ELSE
1570*
1571* Insufficient workspace for a fast algorithm
1572*
1573 itau = 1
1574 iwork = itau + n
1575*
1576* Compute A=Q*R, copying result to U
1577* (Workspace: need 2*N, prefer N+N*NB)
1578*
1579 CALL sgeqrf( m, n, a, lda, work( itau ),
1580 $ work( iwork ), lwork-iwork+1, ierr )
1581 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1582*
1583* Generate Q in U
1584* (Workspace: need N+M, prefer N+M*NB)
1585*
1586 CALL sorgqr( m, m, n, u, ldu, work( itau ),
1587 $ work( iwork ), lwork-iwork+1, ierr )
1588 ie = itau
1589 itauq = ie + n
1590 itaup = itauq + n
1591 iwork = itaup + n
1592*
1593* Zero out below R in A
1594*
1595 IF( n .GT. 1 ) THEN
1596 CALL slaset( 'L', n-1, n-1, zero, zero,
1597 $ a( 2, 1 ), lda )
1598 END IF
1599*
1600* Bidiagonalize R in A
1601* (Workspace: need 4*N, prefer 3*N+2*N*NB)
1602*
1603 CALL sgebrd( n, n, a, lda, s, work( ie ),
1604 $ work( itauq ), work( itaup ),
1605 $ work( iwork ), lwork-iwork+1, ierr )
1606*
1607* Multiply Q in U by left bidiagonalizing vectors
1608* in A
1609* (Workspace: need 3*N+M, prefer 3*N+M*NB)
1610*
1611 CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1612 $ work( itauq ), u, ldu, work( iwork ),
1613 $ lwork-iwork+1, ierr )
1614 iwork = ie + n
1615*
1616* Perform bidiagonal QR iteration, computing left
1617* singular vectors of A in U
1618* (Workspace: need BDSPAC)
1619*
1620 CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1621 $ 1, u, ldu, dum, 1, work( iwork ),
1622 $ info )
1623*
1624 END IF
1625*
1626 ELSE IF( wntvo ) THEN
1627*
1628* Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1629* M left singular vectors to be computed in U and
1630* N right singular vectors to be overwritten on A
1631*
1632 IF( lwork.GE.2*n*n+max( n+m, 4*n, bdspac ) ) THEN
1633*
1634* Sufficient workspace for a fast algorithm
1635*
1636 iu = 1
1637 IF( lwork.GE.wrkbl+2*lda*n ) THEN
1638*
1639* WORK(IU) is LDA by N and WORK(IR) is LDA by N
1640*
1641 ldwrku = lda
1642 ir = iu + ldwrku*n
1643 ldwrkr = lda
1644 ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1645*
1646* WORK(IU) is LDA by N and WORK(IR) is N by N
1647*
1648 ldwrku = lda
1649 ir = iu + ldwrku*n
1650 ldwrkr = n
1651 ELSE
1652*
1653* WORK(IU) is N by N and WORK(IR) is N by N
1654*
1655 ldwrku = n
1656 ir = iu + ldwrku*n
1657 ldwrkr = n
1658 END IF
1659 itau = ir + ldwrkr*n
1660 iwork = itau + n
1661*
1662* Compute A=Q*R, copying result to U
1663* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1664*
1665 CALL sgeqrf( m, n, a, lda, work( itau ),
1666 $ work( iwork ), lwork-iwork+1, ierr )
1667 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1668*
1669* Generate Q in U
1670* (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1671*
1672 CALL sorgqr( m, m, n, u, ldu, work( itau ),
1673 $ work( iwork ), lwork-iwork+1, ierr )
1674*
1675* Copy R to WORK(IU), zeroing out below it
1676*
1677 CALL slacpy( 'U', n, n, a, lda, work( iu ),
1678 $ ldwrku )
1679 CALL slaset( 'L', n-1, n-1, zero, zero,
1680 $ work( iu+1 ), ldwrku )
1681 ie = itau
1682 itauq = ie + n
1683 itaup = itauq + n
1684 iwork = itaup + n
1685*
1686* Bidiagonalize R in WORK(IU), copying result to
1687* WORK(IR)
1688* (Workspace: need 2*N*N+4*N,
1689* prefer 2*N*N+3*N+2*N*NB)
1690*
1691 CALL sgebrd( n, n, work( iu ), ldwrku, s,
1692 $ work( ie ), work( itauq ),
1693 $ work( itaup ), work( iwork ),
1694 $ lwork-iwork+1, ierr )
1695 CALL slacpy( 'U', n, n, work( iu ), ldwrku,
1696 $ work( ir ), ldwrkr )
1697*
1698* Generate left bidiagonalizing vectors in WORK(IU)
1699* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1700*
1701 CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1702 $ work( itauq ), work( iwork ),
1703 $ lwork-iwork+1, ierr )
1704*
1705* Generate right bidiagonalizing vectors in WORK(IR)
1706* (Workspace: need 2*N*N+4*N-1,
1707* prefer 2*N*N+3*N+(N-1)*NB)
1708*
1709 CALL sorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1710 $ work( itaup ), work( iwork ),
1711 $ lwork-iwork+1, ierr )
1712 iwork = ie + n
1713*
1714* Perform bidiagonal QR iteration, computing left
1715* singular vectors of R in WORK(IU) and computing
1716* right singular vectors of R in WORK(IR)
1717* (Workspace: need 2*N*N+BDSPAC)
1718*
1719 CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ),
1720 $ work( ir ), ldwrkr, work( iu ),
1721 $ ldwrku, dum, 1, work( iwork ), info )
1722*
1723* Multiply Q in U by left singular vectors of R in
1724* WORK(IU), storing result in A
1725* (Workspace: need N*N)
1726*
1727 CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1728 $ work( iu ), ldwrku, zero, a, lda )
1729*
1730* Copy left singular vectors of A from A to U
1731*
1732 CALL slacpy( 'F', m, n, a, lda, u, ldu )
1733*
1734* Copy right singular vectors of R from WORK(IR) to A
1735*
1736 CALL slacpy( 'F', n, n, work( ir ), ldwrkr, a,
1737 $ lda )
1738*
1739 ELSE
1740*
1741* Insufficient workspace for a fast algorithm
1742*
1743 itau = 1
1744 iwork = itau + n
1745*
1746* Compute A=Q*R, copying result to U
1747* (Workspace: need 2*N, prefer N+N*NB)
1748*
1749 CALL sgeqrf( m, n, a, lda, work( itau ),
1750 $ work( iwork ), lwork-iwork+1, ierr )
1751 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1752*
1753* Generate Q in U
1754* (Workspace: need N+M, prefer N+M*NB)
1755*
1756 CALL sorgqr( m, m, n, u, ldu, work( itau ),
1757 $ work( iwork ), lwork-iwork+1, ierr )
1758 ie = itau
1759 itauq = ie + n
1760 itaup = itauq + n
1761 iwork = itaup + n
1762*
1763* Zero out below R in A
1764*
1765 IF( n .GT. 1 ) THEN
1766 CALL slaset( 'L', n-1, n-1, zero, zero,
1767 $ a( 2, 1 ), lda )
1768 END IF
1769*
1770* Bidiagonalize R in A
1771* (Workspace: need 4*N, prefer 3*N+2*N*NB)
1772*
1773 CALL sgebrd( n, n, a, lda, s, work( ie ),
1774 $ work( itauq ), work( itaup ),
1775 $ work( iwork ), lwork-iwork+1, ierr )
1776*
1777* Multiply Q in U by left bidiagonalizing vectors
1778* in A
1779* (Workspace: need 3*N+M, prefer 3*N+M*NB)
1780*
1781 CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1782 $ work( itauq ), u, ldu, work( iwork ),
1783 $ lwork-iwork+1, ierr )
1784*
1785* Generate right bidiagonalizing vectors in A
1786* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1787*
1788 CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
1789 $ work( iwork ), lwork-iwork+1, ierr )
1790 iwork = ie + n
1791*
1792* Perform bidiagonal QR iteration, computing left
1793* singular vectors of A in U and computing right
1794* singular vectors of A in A
1795* (Workspace: need BDSPAC)
1796*
1797 CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1798 $ lda, u, ldu, dum, 1, work( iwork ),
1799 $ info )
1800*
1801 END IF
1802*
1803 ELSE IF( wntvas ) THEN
1804*
1805* Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1806* or 'A')
1807* M left singular vectors to be computed in U and
1808* N right singular vectors to be computed in VT
1809*
1810 IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1811*
1812* Sufficient workspace for a fast algorithm
1813*
1814 iu = 1
1815 IF( lwork.GE.wrkbl+lda*n ) THEN
1816*
1817* WORK(IU) is LDA by N
1818*
1819 ldwrku = lda
1820 ELSE
1821*
1822* WORK(IU) is N by N
1823*
1824 ldwrku = n
1825 END IF
1826 itau = iu + ldwrku*n
1827 iwork = itau + n
1828*
1829* Compute A=Q*R, copying result to U
1830* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1831*
1832 CALL sgeqrf( m, n, a, lda, work( itau ),
1833 $ work( iwork ), lwork-iwork+1, ierr )
1834 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1835*
1836* Generate Q in U
1837* (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1838*
1839 CALL sorgqr( m, m, n, u, ldu, work( itau ),
1840 $ work( iwork ), lwork-iwork+1, ierr )
1841*
1842* Copy R to WORK(IU), zeroing out below it
1843*
1844 CALL slacpy( 'U', n, n, a, lda, work( iu ),
1845 $ ldwrku )
1846 CALL slaset( 'L', n-1, n-1, zero, zero,
1847 $ work( iu+1 ), ldwrku )
1848 ie = itau
1849 itauq = ie + n
1850 itaup = itauq + n
1851 iwork = itaup + n
1852*
1853* Bidiagonalize R in WORK(IU), copying result to VT
1854* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1855*
1856 CALL sgebrd( n, n, work( iu ), ldwrku, s,
1857 $ work( ie ), work( itauq ),
1858 $ work( itaup ), work( iwork ),
1859 $ lwork-iwork+1, ierr )
1860 CALL slacpy( 'U', n, n, work( iu ), ldwrku, vt,
1861 $ ldvt )
1862*
1863* Generate left bidiagonalizing vectors in WORK(IU)
1864* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1865*
1866 CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1867 $ work( itauq ), work( iwork ),
1868 $ lwork-iwork+1, ierr )
1869*
1870* Generate right bidiagonalizing vectors in VT
1871* (Workspace: need N*N+4*N-1,
1872* prefer N*N+3*N+(N-1)*NB)
1873*
1874 CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1875 $ work( iwork ), lwork-iwork+1, ierr )
1876 iwork = ie + n
1877*
1878* Perform bidiagonal QR iteration, computing left
1879* singular vectors of R in WORK(IU) and computing
1880* right singular vectors of R in VT
1881* (Workspace: need N*N+BDSPAC)
1882*
1883 CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1884 $ ldvt, work( iu ), ldwrku, dum, 1,
1885 $ work( iwork ), info )
1886*
1887* Multiply Q in U by left singular vectors of R in
1888* WORK(IU), storing result in A
1889* (Workspace: need N*N)
1890*
1891 CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1892 $ work( iu ), ldwrku, zero, a, lda )
1893*
1894* Copy left singular vectors of A from A to U
1895*
1896 CALL slacpy( 'F', m, n, a, lda, u, ldu )
1897*
1898 ELSE
1899*
1900* Insufficient workspace for a fast algorithm
1901*
1902 itau = 1
1903 iwork = itau + n
1904*
1905* Compute A=Q*R, copying result to U
1906* (Workspace: need 2*N, prefer N+N*NB)
1907*
1908 CALL sgeqrf( m, n, a, lda, work( itau ),
1909 $ work( iwork ), lwork-iwork+1, ierr )
1910 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1911*
1912* Generate Q in U
1913* (Workspace: need N+M, prefer N+M*NB)
1914*
1915 CALL sorgqr( m, m, n, u, ldu, work( itau ),
1916 $ work( iwork ), lwork-iwork+1, ierr )
1917*
1918* Copy R from A to VT, zeroing out below it
1919*
1920 CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
1921 IF( n.GT.1 )
1922 $ CALL slaset( 'L', n-1, n-1, zero, zero,
1923 $ vt( 2, 1 ), ldvt )
1924 ie = itau
1925 itauq = ie + n
1926 itaup = itauq + n
1927 iwork = itaup + n
1928*
1929* Bidiagonalize R in VT
1930* (Workspace: need 4*N, prefer 3*N+2*N*NB)
1931*
1932 CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
1933 $ work( itauq ), work( itaup ),
1934 $ work( iwork ), lwork-iwork+1, ierr )
1935*
1936* Multiply Q in U by left bidiagonalizing vectors
1937* in VT
1938* (Workspace: need 3*N+M, prefer 3*N+M*NB)
1939*
1940 CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1941 $ work( itauq ), u, ldu, work( iwork ),
1942 $ lwork-iwork+1, ierr )
1943*
1944* Generate right bidiagonalizing vectors in VT
1945* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1946*
1947 CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1948 $ work( iwork ), lwork-iwork+1, ierr )
1949 iwork = ie + n
1950*
1951* Perform bidiagonal QR iteration, computing left
1952* singular vectors of A in U and computing right
1953* singular vectors of A in VT
1954* (Workspace: need BDSPAC)
1955*
1956 CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1957 $ ldvt, u, ldu, dum, 1, work( iwork ),
1958 $ info )
1959*
1960 END IF
1961*
1962 END IF
1963*
1964 END IF
1965*
1966 ELSE
1967*
1968* M .LT. MNTHR
1969*
1970* Path 10 (M at least N, but not much larger)
1971* Reduce to bidiagonal form without QR decomposition
1972*
1973 ie = 1
1974 itauq = ie + n
1975 itaup = itauq + n
1976 iwork = itaup + n
1977*
1978* Bidiagonalize A
1979* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
1980*
1981 CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
1982 $ work( itaup ), work( iwork ), lwork-iwork+1,
1983 $ ierr )
1984 IF( wntuas ) THEN
1985*
1986* If left singular vectors desired in U, copy result to U
1987* and generate left bidiagonalizing vectors in U
1988* (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
1989*
1990 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1991 IF( wntus )
1992 $ ncu = n
1993 IF( wntua )
1994 $ ncu = m
1995 CALL sorgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
1996 $ work( iwork ), lwork-iwork+1, ierr )
1997 END IF
1998 IF( wntvas ) THEN
1999*
2000* If right singular vectors desired in VT, copy result to
2001* VT and generate right bidiagonalizing vectors in VT
2002* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
2003*
2004 CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
2005 CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2006 $ work( iwork ), lwork-iwork+1, ierr )
2007 END IF
2008 IF( wntuo ) THEN
2009*
2010* If left singular vectors desired in A, generate left
2011* bidiagonalizing vectors in A
2012* (Workspace: need 4*N, prefer 3*N+N*NB)
2013*
2014 CALL sorgbr( 'Q', m, n, n, a, lda, work( itauq ),
2015 $ work( iwork ), lwork-iwork+1, ierr )
2016 END IF
2017 IF( wntvo ) THEN
2018*
2019* If right singular vectors desired in A, generate right
2020* bidiagonalizing vectors in A
2021* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
2022*
2023 CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
2024 $ work( iwork ), lwork-iwork+1, ierr )
2025 END IF
2026 iwork = ie + n
2027 IF( wntuas .OR. wntuo )
2028 $ nru = m
2029 IF( wntun )
2030 $ nru = 0
2031 IF( wntvas .OR. wntvo )
2032 $ ncvt = n
2033 IF( wntvn )
2034 $ ncvt = 0
2035 IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2036*
2037* Perform bidiagonal QR iteration, if desired, computing
2038* left singular vectors in U and computing right singular
2039* vectors in VT
2040* (Workspace: need BDSPAC)
2041*
2042 CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2043 $ ldvt, u, ldu, dum, 1, work( iwork ), info )
2044 ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2045*
2046* Perform bidiagonal QR iteration, if desired, computing
2047* left singular vectors in U and computing right singular
2048* vectors in A
2049* (Workspace: need BDSPAC)
2050*
2051 CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), a, lda,
2052 $ u, ldu, dum, 1, work( iwork ), info )
2053 ELSE
2054*
2055* Perform bidiagonal QR iteration, if desired, computing
2056* left singular vectors in A and computing right singular
2057* vectors in VT
2058* (Workspace: need BDSPAC)
2059*
2060 CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2061 $ ldvt, a, lda, dum, 1, work( iwork ), info )
2062 END IF
2063*
2064 END IF
2065*
2066 ELSE
2067*
2068* A has more columns than rows. If A has sufficiently more
2069* columns than rows, first reduce using the LQ decomposition (if
2070* sufficient workspace available)
2071*
2072 IF( n.GE.mnthr ) THEN
2073*
2074 IF( wntvn ) THEN
2075*
2076* Path 1t(N much larger than M, JOBVT='N')
2077* No right singular vectors to be computed
2078*
2079 itau = 1
2080 iwork = itau + m
2081*
2082* Compute A=L*Q
2083* (Workspace: need 2*M, prefer M+M*NB)
2084*
2085 CALL sgelqf( m, n, a, lda, work( itau ), work( iwork ),
2086 $ lwork-iwork+1, ierr )
2087*
2088* Zero out above L
2089*
2090 CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
2091 ie = 1
2092 itauq = ie + m
2093 itaup = itauq + m
2094 iwork = itaup + m
2095*
2096* Bidiagonalize L in A
2097* (Workspace: need 4*M, prefer 3*M+2*M*NB)
2098*
2099 CALL sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
2100 $ work( itaup ), work( iwork ), lwork-iwork+1,
2101 $ ierr )
2102 IF( wntuo .OR. wntuas ) THEN
2103*
2104* If left singular vectors desired, generate Q
2105* (Workspace: need 4*M, prefer 3*M+M*NB)
2106*
2107 CALL sorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2108 $ work( iwork ), lwork-iwork+1, ierr )
2109 END IF
2110 iwork = ie + m
2111 nru = 0
2112 IF( wntuo .OR. wntuas )
2113 $ nru = m
2114*
2115* Perform bidiagonal QR iteration, computing left singular
2116* vectors of A in A if desired
2117* (Workspace: need BDSPAC)
2118*
2119 CALL sbdsqr( 'U', m, 0, nru, 0, s, work( ie ), dum, 1, a,
2120 $ lda, dum, 1, work( iwork ), info )
2121*
2122* If left singular vectors desired in U, copy them there
2123*
2124 IF( wntuas )
2125 $ CALL slacpy( 'F', m, m, a, lda, u, ldu )
2126*
2127 ELSE IF( wntvo .AND. wntun ) THEN
2128*
2129* Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2130* M right singular vectors to be overwritten on A and
2131* no left singular vectors to be computed
2132*
2133 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2134*
2135* Sufficient workspace for a fast algorithm
2136*
2137 ir = 1
2138 IF( lwork.GE.max( wrkbl, lda*n+m )+lda*m ) THEN
2139*
2140* WORK(IU) is LDA by N and WORK(IR) is LDA by M
2141*
2142 ldwrku = lda
2143 chunk = n
2144 ldwrkr = lda
2145 ELSE IF( lwork.GE.max( wrkbl, lda*n+m )+m*m ) THEN
2146*
2147* WORK(IU) is LDA by N and WORK(IR) is M by M
2148*
2149 ldwrku = lda
2150 chunk = n
2151 ldwrkr = m
2152 ELSE
2153*
2154* WORK(IU) is M by CHUNK and WORK(IR) is M by M
2155*
2156 ldwrku = m
2157 chunk = ( lwork-m*m-m ) / m
2158 ldwrkr = m
2159 END IF
2160 itau = ir + ldwrkr*m
2161 iwork = itau + m
2162*
2163* Compute A=L*Q
2164* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2165*
2166 CALL sgelqf( m, n, a, lda, work( itau ),
2167 $ work( iwork ), lwork-iwork+1, ierr )
2168*
2169* Copy L to WORK(IR) and zero out above it
2170*
2171 CALL slacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
2172 CALL slaset( 'U', m-1, m-1, zero, zero,
2173 $ work( ir+ldwrkr ), ldwrkr )
2174*
2175* Generate Q in A
2176* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2177*
2178 CALL sorglq( m, n, m, a, lda, work( itau ),
2179 $ work( iwork ), lwork-iwork+1, ierr )
2180 ie = itau
2181 itauq = ie + m
2182 itaup = itauq + m
2183 iwork = itaup + m
2184*
2185* Bidiagonalize L in WORK(IR)
2186* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2187*
2188 CALL sgebrd( m, m, work( ir ), ldwrkr, s, work( ie ),
2189 $ work( itauq ), work( itaup ),
2190 $ work( iwork ), lwork-iwork+1, ierr )
2191*
2192* Generate right vectors bidiagonalizing L
2193* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2194*
2195 CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2196 $ work( itaup ), work( iwork ),
2197 $ lwork-iwork+1, ierr )
2198 iwork = ie + m
2199*
2200* Perform bidiagonal QR iteration, computing right
2201* singular vectors of L in WORK(IR)
2202* (Workspace: need M*M+BDSPAC)
2203*
2204 CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2205 $ work( ir ), ldwrkr, dum, 1, dum, 1,
2206 $ work( iwork ), info )
2207 iu = ie + m
2208*
2209* Multiply right singular vectors of L in WORK(IR) by Q
2210* in A, storing result in WORK(IU) and copying to A
2211* (Workspace: need M*M+2*M, prefer M*M+M*N+M)
2212*
2213 DO 30 i = 1, n, chunk
2214 blk = min( n-i+1, chunk )
2215 CALL sgemm( 'N', 'N', m, blk, m, one, work( ir ),
2216 $ ldwrkr, a( 1, i ), lda, zero,
2217 $ work( iu ), ldwrku )
2218 CALL slacpy( 'F', m, blk, work( iu ), ldwrku,
2219 $ a( 1, i ), lda )
2220 30 CONTINUE
2221*
2222 ELSE
2223*
2224* Insufficient workspace for a fast algorithm
2225*
2226 ie = 1
2227 itauq = ie + m
2228 itaup = itauq + m
2229 iwork = itaup + m
2230*
2231* Bidiagonalize A
2232* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
2233*
2234 CALL sgebrd( m, n, a, lda, s, work( ie ),
2235 $ work( itauq ), work( itaup ),
2236 $ work( iwork ), lwork-iwork+1, ierr )
2237*
2238* Generate right vectors bidiagonalizing A
2239* (Workspace: need 4*M, prefer 3*M+M*NB)
2240*
2241 CALL sorgbr( 'P', m, n, m, a, lda, work( itaup ),
2242 $ work( iwork ), lwork-iwork+1, ierr )
2243 iwork = ie + m
2244*
2245* Perform bidiagonal QR iteration, computing right
2246* singular vectors of A in A
2247* (Workspace: need BDSPAC)
2248*
2249 CALL sbdsqr( 'L', m, n, 0, 0, s, work( ie ), a, lda,
2250 $ dum, 1, dum, 1, work( iwork ), info )
2251*
2252 END IF
2253*
2254 ELSE IF( wntvo .AND. wntuas ) THEN
2255*
2256* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2257* M right singular vectors to be overwritten on A and
2258* M left singular vectors to be computed in U
2259*
2260 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2261*
2262* Sufficient workspace for a fast algorithm
2263*
2264 ir = 1
2265 IF( lwork.GE.max( wrkbl, lda*n+m )+lda*m ) THEN
2266*
2267* WORK(IU) is LDA by N and WORK(IR) is LDA by M
2268*
2269 ldwrku = lda
2270 chunk = n
2271 ldwrkr = lda
2272 ELSE IF( lwork.GE.max( wrkbl, lda*n+m )+m*m ) THEN
2273*
2274* WORK(IU) is LDA by N and WORK(IR) is M by M
2275*
2276 ldwrku = lda
2277 chunk = n
2278 ldwrkr = m
2279 ELSE
2280*
2281* WORK(IU) is M by CHUNK and WORK(IR) is M by M
2282*
2283 ldwrku = m
2284 chunk = ( lwork-m*m-m ) / m
2285 ldwrkr = m
2286 END IF
2287 itau = ir + ldwrkr*m
2288 iwork = itau + m
2289*
2290* Compute A=L*Q
2291* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2292*
2293 CALL sgelqf( m, n, a, lda, work( itau ),
2294 $ work( iwork ), lwork-iwork+1, ierr )
2295*
2296* Copy L to U, zeroing about above it
2297*
2298 CALL slacpy( 'L', m, m, a, lda, u, ldu )
2299 CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2300 $ ldu )
2301*
2302* Generate Q in A
2303* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2304*
2305 CALL sorglq( m, n, m, a, lda, work( itau ),
2306 $ work( iwork ), lwork-iwork+1, ierr )
2307 ie = itau
2308 itauq = ie + m
2309 itaup = itauq + m
2310 iwork = itaup + m
2311*
2312* Bidiagonalize L in U, copying result to WORK(IR)
2313* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2314*
2315 CALL sgebrd( m, m, u, ldu, s, work( ie ),
2316 $ work( itauq ), work( itaup ),
2317 $ work( iwork ), lwork-iwork+1, ierr )
2318 CALL slacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
2319*
2320* Generate right vectors bidiagonalizing L in WORK(IR)
2321* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2322*
2323 CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2324 $ work( itaup ), work( iwork ),
2325 $ lwork-iwork+1, ierr )
2326*
2327* Generate left vectors bidiagonalizing L in U
2328* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2329*
2330 CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2331 $ work( iwork ), lwork-iwork+1, ierr )
2332 iwork = ie + m
2333*
2334* Perform bidiagonal QR iteration, computing left
2335* singular vectors of L in U, and computing right
2336* singular vectors of L in WORK(IR)
2337* (Workspace: need M*M+BDSPAC)
2338*
2339 CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2340 $ work( ir ), ldwrkr, u, ldu, dum, 1,
2341 $ work( iwork ), info )
2342 iu = ie + m
2343*
2344* Multiply right singular vectors of L in WORK(IR) by Q
2345* in A, storing result in WORK(IU) and copying to A
2346* (Workspace: need M*M+2*M, prefer M*M+M*N+M))
2347*
2348 DO 40 i = 1, n, chunk
2349 blk = min( n-i+1, chunk )
2350 CALL sgemm( 'N', 'N', m, blk, m, one, work( ir ),
2351 $ ldwrkr, a( 1, i ), lda, zero,
2352 $ work( iu ), ldwrku )
2353 CALL slacpy( 'F', m, blk, work( iu ), ldwrku,
2354 $ a( 1, i ), lda )
2355 40 CONTINUE
2356*
2357 ELSE
2358*
2359* Insufficient workspace for a fast algorithm
2360*
2361 itau = 1
2362 iwork = itau + m
2363*
2364* Compute A=L*Q
2365* (Workspace: need 2*M, prefer M+M*NB)
2366*
2367 CALL sgelqf( m, n, a, lda, work( itau ),
2368 $ work( iwork ), lwork-iwork+1, ierr )
2369*
2370* Copy L to U, zeroing out above it
2371*
2372 CALL slacpy( 'L', m, m, a, lda, u, ldu )
2373 CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2374 $ ldu )
2375*
2376* Generate Q in A
2377* (Workspace: need 2*M, prefer M+M*NB)
2378*
2379 CALL sorglq( m, n, m, a, lda, work( itau ),
2380 $ work( iwork ), lwork-iwork+1, ierr )
2381 ie = itau
2382 itauq = ie + m
2383 itaup = itauq + m
2384 iwork = itaup + m
2385*
2386* Bidiagonalize L in U
2387* (Workspace: need 4*M, prefer 3*M+2*M*NB)
2388*
2389 CALL sgebrd( m, m, u, ldu, s, work( ie ),
2390 $ work( itauq ), work( itaup ),
2391 $ work( iwork ), lwork-iwork+1, ierr )
2392*
2393* Multiply right vectors bidiagonalizing L by Q in A
2394* (Workspace: need 3*M+N, prefer 3*M+N*NB)
2395*
2396 CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2397 $ work( itaup ), a, lda, work( iwork ),
2398 $ lwork-iwork+1, ierr )
2399*
2400* Generate left vectors bidiagonalizing L in U
2401* (Workspace: need 4*M, prefer 3*M+M*NB)
2402*
2403 CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2404 $ work( iwork ), lwork-iwork+1, ierr )
2405 iwork = ie + m
2406*
2407* Perform bidiagonal QR iteration, computing left
2408* singular vectors of A in U and computing right
2409* singular vectors of A in A
2410* (Workspace: need BDSPAC)
2411*
2412 CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), a, lda,
2413 $ u, ldu, dum, 1, work( iwork ), info )
2414*
2415 END IF
2416*
2417 ELSE IF( wntvs ) THEN
2418*
2419 IF( wntun ) THEN
2420*
2421* Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2422* M right singular vectors to be computed in VT and
2423* no left singular vectors to be computed
2424*
2425 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2426*
2427* Sufficient workspace for a fast algorithm
2428*
2429 ir = 1
2430 IF( lwork.GE.wrkbl+lda*m ) THEN
2431*
2432* WORK(IR) is LDA by M
2433*
2434 ldwrkr = lda
2435 ELSE
2436*
2437* WORK(IR) is M by M
2438*
2439 ldwrkr = m
2440 END IF
2441 itau = ir + ldwrkr*m
2442 iwork = itau + m
2443*
2444* Compute A=L*Q
2445* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2446*
2447 CALL sgelqf( m, n, a, lda, work( itau ),
2448 $ work( iwork ), lwork-iwork+1, ierr )
2449*
2450* Copy L to WORK(IR), zeroing out above it
2451*
2452 CALL slacpy( 'L', m, m, a, lda, work( ir ),
2453 $ ldwrkr )
2454 CALL slaset( 'U', m-1, m-1, zero, zero,
2455 $ work( ir+ldwrkr ), ldwrkr )
2456*
2457* Generate Q in A
2458* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2459*
2460 CALL sorglq( m, n, m, a, lda, work( itau ),
2461 $ work( iwork ), lwork-iwork+1, ierr )
2462 ie = itau
2463 itauq = ie + m
2464 itaup = itauq + m
2465 iwork = itaup + m
2466*
2467* Bidiagonalize L in WORK(IR)
2468* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2469*
2470 CALL sgebrd( m, m, work( ir ), ldwrkr, s,
2471 $ work( ie ), work( itauq ),
2472 $ work( itaup ), work( iwork ),
2473 $ lwork-iwork+1, ierr )
2474*
2475* Generate right vectors bidiagonalizing L in
2476* WORK(IR)
2477* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
2478*
2479 CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2480 $ work( itaup ), work( iwork ),
2481 $ lwork-iwork+1, ierr )
2482 iwork = ie + m
2483*
2484* Perform bidiagonal QR iteration, computing right
2485* singular vectors of L in WORK(IR)
2486* (Workspace: need M*M+BDSPAC)
2487*
2488 CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2489 $ work( ir ), ldwrkr, dum, 1, dum, 1,
2490 $ work( iwork ), info )
2491*
2492* Multiply right singular vectors of L in WORK(IR) by
2493* Q in A, storing result in VT
2494* (Workspace: need M*M)
2495*
2496 CALL sgemm( 'N', 'N', m, n, m, one, work( ir ),
2497 $ ldwrkr, a, lda, zero, vt, ldvt )
2498*
2499 ELSE
2500*
2501* Insufficient workspace for a fast algorithm
2502*
2503 itau = 1
2504 iwork = itau + m
2505*
2506* Compute A=L*Q
2507* (Workspace: need 2*M, prefer M+M*NB)
2508*
2509 CALL sgelqf( m, n, a, lda, work( itau ),
2510 $ work( iwork ), lwork-iwork+1, ierr )
2511*
2512* Copy result to VT
2513*
2514 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2515*
2516* Generate Q in VT
2517* (Workspace: need 2*M, prefer M+M*NB)
2518*
2519 CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2520 $ work( iwork ), lwork-iwork+1, ierr )
2521 ie = itau
2522 itauq = ie + m
2523 itaup = itauq + m
2524 iwork = itaup + m
2525*
2526* Zero out above L in A
2527*
2528 CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2529 $ lda )
2530*
2531* Bidiagonalize L in A
2532* (Workspace: need 4*M, prefer 3*M+2*M*NB)
2533*
2534 CALL sgebrd( m, m, a, lda, s, work( ie ),
2535 $ work( itauq ), work( itaup ),
2536 $ work( iwork ), lwork-iwork+1, ierr )
2537*
2538* Multiply right vectors bidiagonalizing L by Q in VT
2539* (Workspace: need 3*M+N, prefer 3*M+N*NB)
2540*
2541 CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
2542 $ work( itaup ), vt, ldvt,
2543 $ work( iwork ), lwork-iwork+1, ierr )
2544 iwork = ie + m
2545*
2546* Perform bidiagonal QR iteration, computing right
2547* singular vectors of A in VT
2548* (Workspace: need BDSPAC)
2549*
2550 CALL sbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
2551 $ ldvt, dum, 1, dum, 1, work( iwork ),
2552 $ info )
2553*
2554 END IF
2555*
2556 ELSE IF( wntuo ) THEN
2557*
2558* Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2559* M right singular vectors to be computed in VT and
2560* M left singular vectors to be overwritten on A
2561*
2562 IF( lwork.GE.2*m*m+max( 4*m, bdspac ) ) THEN
2563*
2564* Sufficient workspace for a fast algorithm
2565*
2566 iu = 1
2567 IF( lwork.GE.wrkbl+2*lda*m ) THEN
2568*
2569* WORK(IU) is LDA by M and WORK(IR) is LDA by M
2570*
2571 ldwrku = lda
2572 ir = iu + ldwrku*m
2573 ldwrkr = lda
2574 ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
2575*
2576* WORK(IU) is LDA by M and WORK(IR) is M by M
2577*
2578 ldwrku = lda
2579 ir = iu + ldwrku*m
2580 ldwrkr = m
2581 ELSE
2582*
2583* WORK(IU) is M by M and WORK(IR) is M by M
2584*
2585 ldwrku = m
2586 ir = iu + ldwrku*m
2587 ldwrkr = m
2588 END IF
2589 itau = ir + ldwrkr*m
2590 iwork = itau + m
2591*
2592* Compute A=L*Q
2593* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2594*
2595 CALL sgelqf( m, n, a, lda, work( itau ),
2596 $ work( iwork ), lwork-iwork+1, ierr )
2597*
2598* Copy L to WORK(IU), zeroing out below it
2599*
2600 CALL slacpy( 'L', m, m, a, lda, work( iu ),
2601 $ ldwrku )
2602 CALL slaset( 'U', m-1, m-1, zero, zero,
2603 $ work( iu+ldwrku ), ldwrku )
2604*
2605* Generate Q in A
2606* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2607*
2608 CALL sorglq( m, n, m, a, lda, work( itau ),
2609 $ work( iwork ), lwork-iwork+1, ierr )
2610 ie = itau
2611 itauq = ie + m
2612 itaup = itauq + m
2613 iwork = itaup + m
2614*
2615* Bidiagonalize L in WORK(IU), copying result to
2616* WORK(IR)
2617* (Workspace: need 2*M*M+4*M,
2618* prefer 2*M*M+3*M+2*M*NB)
2619*
2620 CALL sgebrd( m, m, work( iu ), ldwrku, s,
2621 $ work( ie ), work( itauq ),
2622 $ work( itaup ), work( iwork ),
2623 $ lwork-iwork+1, ierr )
2624 CALL slacpy( 'L', m, m, work( iu ), ldwrku,
2625 $ work( ir ), ldwrkr )
2626*
2627* Generate right bidiagonalizing vectors in WORK(IU)
2628* (Workspace: need 2*M*M+4*M-1,
2629* prefer 2*M*M+3*M+(M-1)*NB)
2630*
2631 CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
2632 $ work( itaup ), work( iwork ),
2633 $ lwork-iwork+1, ierr )
2634*
2635* Generate left bidiagonalizing vectors in WORK(IR)
2636* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
2637*
2638 CALL sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
2639 $ work( itauq ), work( iwork ),
2640 $ lwork-iwork+1, ierr )
2641 iwork = ie + m
2642*
2643* Perform bidiagonal QR iteration, computing left
2644* singular vectors of L in WORK(IR) and computing
2645* right singular vectors of L in WORK(IU)
2646* (Workspace: need 2*M*M+BDSPAC)
2647*
2648 CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2649 $ work( iu ), ldwrku, work( ir ),
2650 $ ldwrkr, dum, 1, work( iwork ), info )
2651*
2652* Multiply right singular vectors of L in WORK(IU) by
2653* Q in A, storing result in VT
2654* (Workspace: need M*M)
2655*
2656 CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
2657 $ ldwrku, a, lda, zero, vt, ldvt )
2658*
2659* Copy left singular vectors of L to A
2660* (Workspace: need M*M)
2661*
2662 CALL slacpy( 'F', m, m, work( ir ), ldwrkr, a,
2663 $ lda )
2664*
2665 ELSE
2666*
2667* Insufficient workspace for a fast algorithm
2668*
2669 itau = 1
2670 iwork = itau + m
2671*
2672* Compute A=L*Q, copying result to VT
2673* (Workspace: need 2*M, prefer M+M*NB)
2674*
2675 CALL sgelqf( m, n, a, lda, work( itau ),
2676 $ work( iwork ), lwork-iwork+1, ierr )
2677 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2678*
2679* Generate Q in VT
2680* (Workspace: need 2*M, prefer M+M*NB)
2681*
2682 CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2683 $ work( iwork ), lwork-iwork+1, ierr )
2684 ie = itau
2685 itauq = ie + m
2686 itaup = itauq + m
2687 iwork = itaup + m
2688*
2689* Zero out above L in A
2690*
2691 CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2692 $ lda )
2693*
2694* Bidiagonalize L in A
2695* (Workspace: need 4*M, prefer 3*M+2*M*NB)
2696*
2697 CALL sgebrd( m, m, a, lda, s, work( ie ),
2698 $ work( itauq ), work( itaup ),
2699 $ work( iwork ), lwork-iwork+1, ierr )
2700*
2701* Multiply right vectors bidiagonalizing L by Q in VT
2702* (Workspace: need 3*M+N, prefer 3*M+N*NB)
2703*
2704 CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
2705 $ work( itaup ), vt, ldvt,
2706 $ work( iwork ), lwork-iwork+1, ierr )
2707*
2708* Generate left bidiagonalizing vectors of L in A
2709* (Workspace: need 4*M, prefer 3*M+M*NB)
2710*
2711 CALL sorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2712 $ work( iwork ), lwork-iwork+1, ierr )
2713 iwork = ie + m
2714*
2715* Perform bidiagonal QR iteration, compute left
2716* singular vectors of A in A and compute right
2717* singular vectors of A in VT
2718* (Workspace: need BDSPAC)
2719*
2720 CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2721 $ ldvt, a, lda, dum, 1, work( iwork ),
2722 $ info )
2723*
2724 END IF
2725*
2726 ELSE IF( wntuas ) THEN
2727*
2728* Path 6t(N much larger than M, JOBU='S' or 'A',
2729* JOBVT='S')
2730* M right singular vectors to be computed in VT and
2731* M left singular vectors to be computed in U
2732*
2733 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2734*
2735* Sufficient workspace for a fast algorithm
2736*
2737 iu = 1
2738 IF( lwork.GE.wrkbl+lda*m ) THEN
2739*
2740* WORK(IU) is LDA by N
2741*
2742 ldwrku = lda
2743 ELSE
2744*
2745* WORK(IU) is LDA by M
2746*
2747 ldwrku = m
2748 END IF
2749 itau = iu + ldwrku*m
2750 iwork = itau + m
2751*
2752* Compute A=L*Q
2753* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2754*
2755 CALL sgelqf( m, n, a, lda, work( itau ),
2756 $ work( iwork ), lwork-iwork+1, ierr )
2757*
2758* Copy L to WORK(IU), zeroing out above it
2759*
2760 CALL slacpy( 'L', m, m, a, lda, work( iu ),
2761 $ ldwrku )
2762 CALL slaset( 'U', m-1, m-1, zero, zero,
2763 $ work( iu+ldwrku ), ldwrku )
2764*
2765* Generate Q in A
2766* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2767*
2768 CALL sorglq( m, n, m, a, lda, work( itau ),
2769 $ work( iwork ), lwork-iwork+1, ierr )
2770 ie = itau
2771 itauq = ie + m
2772 itaup = itauq + m
2773 iwork = itaup + m
2774*
2775* Bidiagonalize L in WORK(IU), copying result to U
2776* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2777*
2778 CALL sgebrd( m, m, work( iu ), ldwrku, s,
2779 $ work( ie ), work( itauq ),
2780 $ work( itaup ), work( iwork ),
2781 $ lwork-iwork+1, ierr )
2782 CALL slacpy( 'L', m, m, work( iu ), ldwrku, u,
2783 $ ldu )
2784*
2785* Generate right bidiagonalizing vectors in WORK(IU)
2786* (Workspace: need M*M+4*M-1,
2787* prefer M*M+3*M+(M-1)*NB)
2788*
2789 CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
2790 $ work( itaup ), work( iwork ),
2791 $ lwork-iwork+1, ierr )
2792*
2793* Generate left bidiagonalizing vectors in U
2794* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2795*
2796 CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2797 $ work( iwork ), lwork-iwork+1, ierr )
2798 iwork = ie + m
2799*
2800* Perform bidiagonal QR iteration, computing left
2801* singular vectors of L in U and computing right
2802* singular vectors of L in WORK(IU)
2803* (Workspace: need M*M+BDSPAC)
2804*
2805 CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2806 $ work( iu ), ldwrku, u, ldu, dum, 1,
2807 $ work( iwork ), info )
2808*
2809* Multiply right singular vectors of L in WORK(IU) by
2810* Q in A, storing result in VT
2811* (Workspace: need M*M)
2812*
2813 CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
2814 $ ldwrku, a, lda, zero, vt, ldvt )
2815*
2816 ELSE
2817*
2818* Insufficient workspace for a fast algorithm
2819*
2820 itau = 1
2821 iwork = itau + m
2822*
2823* Compute A=L*Q, copying result to VT
2824* (Workspace: need 2*M, prefer M+M*NB)
2825*
2826 CALL sgelqf( m, n, a, lda, work( itau ),
2827 $ work( iwork ), lwork-iwork+1, ierr )
2828 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2829*
2830* Generate Q in VT
2831* (Workspace: need 2*M, prefer M+M*NB)
2832*
2833 CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2834 $ work( iwork ), lwork-iwork+1, ierr )
2835*
2836* Copy L to U, zeroing out above it
2837*
2838 CALL slacpy( 'L', m, m, a, lda, u, ldu )
2839 CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2840 $ ldu )
2841 ie = itau
2842 itauq = ie + m
2843 itaup = itauq + m
2844 iwork = itaup + m
2845*
2846* Bidiagonalize L in U
2847* (Workspace: need 4*M, prefer 3*M+2*M*NB)
2848*
2849 CALL sgebrd( m, m, u, ldu, s, work( ie ),
2850 $ work( itauq ), work( itaup ),
2851 $ work( iwork ), lwork-iwork+1, ierr )
2852*
2853* Multiply right bidiagonalizing vectors in U by Q
2854* in VT
2855* (Workspace: need 3*M+N, prefer 3*M+N*NB)
2856*
2857 CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2858 $ work( itaup ), vt, ldvt,
2859 $ work( iwork ), lwork-iwork+1, ierr )
2860*
2861* Generate left bidiagonalizing vectors in U
2862* (Workspace: need 4*M, prefer 3*M+M*NB)
2863*
2864 CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2865 $ work( iwork ), lwork-iwork+1, ierr )
2866 iwork = ie + m
2867*
2868* Perform bidiagonal QR iteration, computing left
2869* singular vectors of A in U and computing right
2870* singular vectors of A in VT
2871* (Workspace: need BDSPAC)
2872*
2873 CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2874 $ ldvt, u, ldu, dum, 1, work( iwork ),
2875 $ info )
2876*
2877 END IF
2878*
2879 END IF
2880*
2881 ELSE IF( wntva ) THEN
2882*
2883 IF( wntun ) THEN
2884*
2885* Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2886* N right singular vectors to be computed in VT and
2887* no left singular vectors to be computed
2888*
2889 IF( lwork.GE.m*m+max( n+m, 4*m, bdspac ) ) THEN
2890*
2891* Sufficient workspace for a fast algorithm
2892*
2893 ir = 1
2894 IF( lwork.GE.wrkbl+lda*m ) THEN
2895*
2896* WORK(IR) is LDA by M
2897*
2898 ldwrkr = lda
2899 ELSE
2900*
2901* WORK(IR) is M by M
2902*
2903 ldwrkr = m
2904 END IF
2905 itau = ir + ldwrkr*m
2906 iwork = itau + m
2907*
2908* Compute A=L*Q, copying result to VT
2909* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2910*
2911 CALL sgelqf( m, n, a, lda, work( itau ),
2912 $ work( iwork ), lwork-iwork+1, ierr )
2913 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2914*
2915* Copy L to WORK(IR), zeroing out above it
2916*
2917 CALL slacpy( 'L', m, m, a, lda, work( ir ),
2918 $ ldwrkr )
2919 CALL slaset( 'U', m-1, m-1, zero, zero,
2920 $ work( ir+ldwrkr ), ldwrkr )
2921*
2922* Generate Q in VT
2923* (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
2924*
2925 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
2926 $ work( iwork ), lwork-iwork+1, ierr )
2927 ie = itau
2928 itauq = ie + m
2929 itaup = itauq + m
2930 iwork = itaup + m
2931*
2932* Bidiagonalize L in WORK(IR)
2933* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2934*
2935 CALL sgebrd( m, m, work( ir ), ldwrkr, s,
2936 $ work( ie ), work( itauq ),
2937 $ work( itaup ), work( iwork ),
2938 $ lwork-iwork+1, ierr )
2939*
2940* Generate right bidiagonalizing vectors in WORK(IR)
2941* (Workspace: need M*M+4*M-1,
2942* prefer M*M+3*M+(M-1)*NB)
2943*
2944 CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2945 $ work( itaup ), work( iwork ),
2946 $ lwork-iwork+1, ierr )
2947 iwork = ie + m
2948*
2949* Perform bidiagonal QR iteration, computing right
2950* singular vectors of L in WORK(IR)
2951* (Workspace: need M*M+BDSPAC)
2952*
2953 CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2954 $ work( ir ), ldwrkr, dum, 1, dum, 1,
2955 $ work( iwork ), info )
2956*
2957* Multiply right singular vectors of L in WORK(IR) by
2958* Q in VT, storing result in A
2959* (Workspace: need M*M)
2960*
2961 CALL sgemm( 'N', 'N', m, n, m, one, work( ir ),
2962 $ ldwrkr, vt, ldvt, zero, a, lda )
2963*
2964* Copy right singular vectors of A from A to VT
2965*
2966 CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
2967*
2968 ELSE
2969*
2970* Insufficient workspace for a fast algorithm
2971*
2972 itau = 1
2973 iwork = itau + m
2974*
2975* Compute A=L*Q, copying result to VT
2976* (Workspace: need 2*M, prefer M+M*NB)
2977*
2978 CALL sgelqf( m, n, a, lda, work( itau ),
2979 $ work( iwork ), lwork-iwork+1, ierr )
2980 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2981*
2982* Generate Q in VT
2983* (Workspace: need M+N, prefer M+N*NB)
2984*
2985 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
2986 $ work( iwork ), lwork-iwork+1, ierr )
2987 ie = itau
2988 itauq = ie + m
2989 itaup = itauq + m
2990 iwork = itaup + m
2991*
2992* Zero out above L in A
2993*
2994 CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2995 $ lda )
2996*
2997* Bidiagonalize L in A
2998* (Workspace: need 4*M, prefer 3*M+2*M*NB)
2999*
3000 CALL sgebrd( m, m, a, lda, s, work( ie ),
3001 $ work( itauq ), work( itaup ),
3002 $ work( iwork ), lwork-iwork+1, ierr )
3003*
3004* Multiply right bidiagonalizing vectors in A by Q
3005* in VT
3006* (Workspace: need 3*M+N, prefer 3*M+N*NB)
3007*
3008 CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
3009 $ work( itaup ), vt, ldvt,
3010 $ work( iwork ), lwork-iwork+1, ierr )
3011 iwork = ie + m
3012*
3013* Perform bidiagonal QR iteration, computing right
3014* singular vectors of A in VT
3015* (Workspace: need BDSPAC)
3016*
3017 CALL sbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
3018 $ ldvt, dum, 1, dum, 1, work( iwork ),
3019 $ info )
3020*
3021 END IF
3022*
3023 ELSE IF( wntuo ) THEN
3024*
3025* Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3026* N right singular vectors to be computed in VT and
3027* M left singular vectors to be overwritten on A
3028*
3029 IF( lwork.GE.2*m*m+max( n+m, 4*m, bdspac ) ) THEN
3030*
3031* Sufficient workspace for a fast algorithm
3032*
3033 iu = 1
3034 IF( lwork.GE.wrkbl+2*lda*m ) THEN
3035*
3036* WORK(IU) is LDA by M and WORK(IR) is LDA by M
3037*
3038 ldwrku = lda
3039 ir = iu + ldwrku*m
3040 ldwrkr = lda
3041 ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
3042*
3043* WORK(IU) is LDA by M and WORK(IR) is M by M
3044*
3045 ldwrku = lda
3046 ir = iu + ldwrku*m
3047 ldwrkr = m
3048 ELSE
3049*
3050* WORK(IU) is M by M and WORK(IR) is M by M
3051*
3052 ldwrku = m
3053 ir = iu + ldwrku*m
3054 ldwrkr = m
3055 END IF
3056 itau = ir + ldwrkr*m
3057 iwork = itau + m
3058*
3059* Compute A=L*Q, copying result to VT
3060* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3061*
3062 CALL sgelqf( m, n, a, lda, work( itau ),
3063 $ work( iwork ), lwork-iwork+1, ierr )
3064 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3065*
3066* Generate Q in VT
3067* (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3068*
3069 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3070 $ work( iwork ), lwork-iwork+1, ierr )
3071*
3072* Copy L to WORK(IU), zeroing out above it
3073*
3074 CALL slacpy( 'L', m, m, a, lda, work( iu ),
3075 $ ldwrku )
3076 CALL slaset( 'U', m-1, m-1, zero, zero,
3077 $ work( iu+ldwrku ), ldwrku )
3078 ie = itau
3079 itauq = ie + m
3080 itaup = itauq + m
3081 iwork = itaup + m
3082*
3083* Bidiagonalize L in WORK(IU), copying result to
3084* WORK(IR)
3085* (Workspace: need 2*M*M+4*M,
3086* prefer 2*M*M+3*M+2*M*NB)
3087*
3088 CALL sgebrd( m, m, work( iu ), ldwrku, s,
3089 $ work( ie ), work( itauq ),
3090 $ work( itaup ), work( iwork ),
3091 $ lwork-iwork+1, ierr )
3092 CALL slacpy( 'L', m, m, work( iu ), ldwrku,
3093 $ work( ir ), ldwrkr )
3094*
3095* Generate right bidiagonalizing vectors in WORK(IU)
3096* (Workspace: need 2*M*M+4*M-1,
3097* prefer 2*M*M+3*M+(M-1)*NB)
3098*
3099 CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
3100 $ work( itaup ), work( iwork ),
3101 $ lwork-iwork+1, ierr )
3102*
3103* Generate left bidiagonalizing vectors in WORK(IR)
3104* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
3105*
3106 CALL sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
3107 $ work( itauq ), work( iwork ),
3108 $ lwork-iwork+1, ierr )
3109 iwork = ie + m
3110*
3111* Perform bidiagonal QR iteration, computing left
3112* singular vectors of L in WORK(IR) and computing
3113* right singular vectors of L in WORK(IU)
3114* (Workspace: need 2*M*M+BDSPAC)
3115*
3116 CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
3117 $ work( iu ), ldwrku, work( ir ),
3118 $ ldwrkr, dum, 1, work( iwork ), info )
3119*
3120* Multiply right singular vectors of L in WORK(IU) by
3121* Q in VT, storing result in A
3122* (Workspace: need M*M)
3123*
3124 CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
3125 $ ldwrku, vt, ldvt, zero, a, lda )
3126*
3127* Copy right singular vectors of A from A to VT
3128*
3129 CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
3130*
3131* Copy left singular vectors of A from WORK(IR) to A
3132*
3133 CALL slacpy( 'F', m, m, work( ir ), ldwrkr, a,
3134 $ lda )
3135*
3136 ELSE
3137*
3138* Insufficient workspace for a fast algorithm
3139*
3140 itau = 1
3141 iwork = itau + m
3142*
3143* Compute A=L*Q, copying result to VT
3144* (Workspace: need 2*M, prefer M+M*NB)
3145*
3146 CALL sgelqf( m, n, a, lda, work( itau ),
3147 $ work( iwork ), lwork-iwork+1, ierr )
3148 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3149*
3150* Generate Q in VT
3151* (Workspace: need M+N, prefer M+N*NB)
3152*
3153 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3154 $ work( iwork ), lwork-iwork+1, ierr )
3155 ie = itau
3156 itauq = ie + m
3157 itaup = itauq + m
3158 iwork = itaup + m
3159*
3160* Zero out above L in A
3161*
3162 CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
3163 $ lda )
3164*
3165* Bidiagonalize L in A
3166* (Workspace: need 4*M, prefer 3*M+2*M*NB)
3167*
3168 CALL sgebrd( m, m, a, lda, s, work( ie ),
3169 $ work( itauq ), work( itaup ),
3170 $ work( iwork ), lwork-iwork+1, ierr )
3171*
3172* Multiply right bidiagonalizing vectors in A by Q
3173* in VT
3174* (Workspace: need 3*M+N, prefer 3*M+N*NB)
3175*
3176 CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
3177 $ work( itaup ), vt, ldvt,
3178 $ work( iwork ), lwork-iwork+1, ierr )
3179*
3180* Generate left bidiagonalizing vectors in A
3181* (Workspace: need 4*M, prefer 3*M+M*NB)
3182*
3183 CALL sorgbr( 'Q', m, m, m, a, lda, work( itauq ),
3184 $ work( iwork ), lwork-iwork+1, ierr )
3185 iwork = ie + m
3186*
3187* Perform bidiagonal QR iteration, computing left
3188* singular vectors of A in A and computing right
3189* singular vectors of A in VT
3190* (Workspace: need BDSPAC)
3191*
3192 CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3193 $ ldvt, a, lda, dum, 1, work( iwork ),
3194 $ info )
3195*
3196 END IF
3197*
3198 ELSE IF( wntuas ) THEN
3199*
3200* Path 9t(N much larger than M, JOBU='S' or 'A',
3201* JOBVT='A')
3202* N right singular vectors to be computed in VT and
3203* M left singular vectors to be computed in U
3204*
3205 IF( lwork.GE.m*m+max( n+m, 4*m, bdspac ) ) THEN
3206*
3207* Sufficient workspace for a fast algorithm
3208*
3209 iu = 1
3210 IF( lwork.GE.wrkbl+lda*m ) THEN
3211*
3212* WORK(IU) is LDA by M
3213*
3214 ldwrku = lda
3215 ELSE
3216*
3217* WORK(IU) is M by M
3218*
3219 ldwrku = m
3220 END IF
3221 itau = iu + ldwrku*m
3222 iwork = itau + m
3223*
3224* Compute A=L*Q, copying result to VT
3225* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
3226*
3227 CALL sgelqf( m, n, a, lda, work( itau ),
3228 $ work( iwork ), lwork-iwork+1, ierr )
3229 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3230*
3231* Generate Q in VT
3232* (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
3233*
3234 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3235 $ work( iwork ), lwork-iwork+1, ierr )
3236*
3237* Copy L to WORK(IU), zeroing out above it
3238*
3239 CALL slacpy( 'L', m, m, a, lda, work( iu ),
3240 $ ldwrku )
3241 CALL slaset( 'U', m-1, m-1, zero, zero,
3242 $ work( iu+ldwrku ), ldwrku )
3243 ie = itau
3244 itauq = ie + m
3245 itaup = itauq + m
3246 iwork = itaup + m
3247*
3248* Bidiagonalize L in WORK(IU), copying result to U
3249* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
3250*
3251 CALL sgebrd( m, m, work( iu ), ldwrku, s,
3252 $ work( ie ), work( itauq ),
3253 $ work( itaup ), work( iwork ),
3254 $ lwork-iwork+1, ierr )
3255 CALL slacpy( 'L', m, m, work( iu ), ldwrku, u,
3256 $ ldu )
3257*
3258* Generate right bidiagonalizing vectors in WORK(IU)
3259* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
3260*
3261 CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
3262 $ work( itaup ), work( iwork ),
3263 $ lwork-iwork+1, ierr )
3264*
3265* Generate left bidiagonalizing vectors in U
3266* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
3267*
3268 CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3269 $ work( iwork ), lwork-iwork+1, ierr )
3270 iwork = ie + m
3271*
3272* Perform bidiagonal QR iteration, computing left
3273* singular vectors of L in U and computing right
3274* singular vectors of L in WORK(IU)
3275* (Workspace: need M*M+BDSPAC)
3276*
3277 CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
3278 $ work( iu ), ldwrku, u, ldu, dum, 1,
3279 $ work( iwork ), info )
3280*
3281* Multiply right singular vectors of L in WORK(IU) by
3282* Q in VT, storing result in A
3283* (Workspace: need M*M)
3284*
3285 CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
3286 $ ldwrku, vt, ldvt, zero, a, lda )
3287*
3288* Copy right singular vectors of A from A to VT
3289*
3290 CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
3291*
3292 ELSE
3293*
3294* Insufficient workspace for a fast algorithm
3295*
3296 itau = 1
3297 iwork = itau + m
3298*
3299* Compute A=L*Q, copying result to VT
3300* (Workspace: need 2*M, prefer M+M*NB)
3301*
3302 CALL sgelqf( m, n, a, lda, work( itau ),
3303 $ work( iwork ), lwork-iwork+1, ierr )
3304 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3305*
3306* Generate Q in VT
3307* (Workspace: need M+N, prefer M+N*NB)
3308*
3309 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3310 $ work( iwork ), lwork-iwork+1, ierr )
3311*
3312* Copy L to U, zeroing out above it
3313*
3314 CALL slacpy( 'L', m, m, a, lda, u, ldu )
3315 CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
3316 $ ldu )
3317 ie = itau
3318 itauq = ie + m
3319 itaup = itauq + m
3320 iwork = itaup + m
3321*
3322* Bidiagonalize L in U
3323* (Workspace: need 4*M, prefer 3*M+2*M*NB)
3324*
3325 CALL sgebrd( m, m, u, ldu, s, work( ie ),
3326 $ work( itauq ), work( itaup ),
3327 $ work( iwork ), lwork-iwork+1, ierr )
3328*
3329* Multiply right bidiagonalizing vectors in U by Q
3330* in VT
3331* (Workspace: need 3*M+N, prefer 3*M+N*NB)
3332*
3333 CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
3334 $ work( itaup ), vt, ldvt,
3335 $ work( iwork ), lwork-iwork+1, ierr )
3336*
3337* Generate left bidiagonalizing vectors in U
3338* (Workspace: need 4*M, prefer 3*M+M*NB)
3339*
3340 CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3341 $ work( iwork ), lwork-iwork+1, ierr )
3342 iwork = ie + m
3343*
3344* Perform bidiagonal QR iteration, computing left
3345* singular vectors of A in U and computing right
3346* singular vectors of A in VT
3347* (Workspace: need BDSPAC)
3348*
3349 CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3350 $ ldvt, u, ldu, dum, 1, work( iwork ),
3351 $ info )
3352*
3353 END IF
3354*
3355 END IF
3356*
3357 END IF
3358*
3359 ELSE
3360*
3361* N .LT. MNTHR
3362*
3363* Path 10t(N greater than M, but not much larger)
3364* Reduce to bidiagonal form without LQ decomposition
3365*
3366 ie = 1
3367 itauq = ie + m
3368 itaup = itauq + m
3369 iwork = itaup + m
3370*
3371* Bidiagonalize A
3372* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
3373*
3374 CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
3375 $ work( itaup ), work( iwork ), lwork-iwork+1,
3376 $ ierr )
3377 IF( wntuas ) THEN
3378*
3379* If left singular vectors desired in U, copy result to U
3380* and generate left bidiagonalizing vectors in U
3381* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3382*
3383 CALL slacpy( 'L', m, m, a, lda, u, ldu )
3384 CALL sorgbr( 'Q', m, m, n, u, ldu, work( itauq ),
3385 $ work( iwork ), lwork-iwork+1, ierr )
3386 END IF
3387 IF( wntvas ) THEN
3388*
3389* If right singular vectors desired in VT, copy result to
3390* VT and generate right bidiagonalizing vectors in VT
3391* (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
3392*
3393 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3394 IF( wntva )
3395 $ nrvt = n
3396 IF( wntvs )
3397 $ nrvt = m
3398 CALL sorgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
3399 $ work( iwork ), lwork-iwork+1, ierr )
3400 END IF
3401 IF( wntuo ) THEN
3402*
3403* If left singular vectors desired in A, generate left
3404* bidiagonalizing vectors in A
3405* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3406*
3407 CALL sorgbr( 'Q', m, m, n, a, lda, work( itauq ),
3408 $ work( iwork ), lwork-iwork+1, ierr )
3409 END IF
3410 IF( wntvo ) THEN
3411*
3412* If right singular vectors desired in A, generate right
3413* bidiagonalizing vectors in A
3414* (Workspace: need 4*M, prefer 3*M+M*NB)
3415*
3416 CALL sorgbr( 'P', m, n, m, a, lda, work( itaup ),
3417 $ work( iwork ), lwork-iwork+1, ierr )
3418 END IF
3419 iwork = ie + m
3420 IF( wntuas .OR. wntuo )
3421 $ nru = m
3422 IF( wntun )
3423 $ nru = 0
3424 IF( wntvas .OR. wntvo )
3425 $ ncvt = n
3426 IF( wntvn )
3427 $ ncvt = 0
3428 IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
3429*
3430* Perform bidiagonal QR iteration, if desired, computing
3431* left singular vectors in U and computing right singular
3432* vectors in VT
3433* (Workspace: need BDSPAC)
3434*
3435 CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3436 $ ldvt, u, ldu, dum, 1, work( iwork ), info )
3437 ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
3438*
3439* Perform bidiagonal QR iteration, if desired, computing
3440* left singular vectors in U and computing right singular
3441* vectors in A
3442* (Workspace: need BDSPAC)
3443*
3444 CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), a, lda,
3445 $ u, ldu, dum, 1, work( iwork ), info )
3446 ELSE
3447*
3448* Perform bidiagonal QR iteration, if desired, computing
3449* left singular vectors in A and computing right singular
3450* vectors in VT
3451* (Workspace: need BDSPAC)
3452*
3453 CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3454 $ ldvt, a, lda, dum, 1, work( iwork ), info )
3455 END IF
3456*
3457 END IF
3458*
3459 END IF
3460*
3461* If SBDSQR failed to converge, copy unconverged superdiagonals
3462* to WORK( 2:MINMN )
3463*
3464 IF( info.NE.0 ) THEN
3465 IF( ie.GT.2 ) THEN
3466 DO 50 i = 1, minmn - 1
3467 work( i+1 ) = work( i+ie-1 )
3468 50 CONTINUE
3469 END IF
3470 IF( ie.LT.2 ) THEN
3471 DO 60 i = minmn - 1, 1, -1
3472 work( i+1 ) = work( i+ie-1 )
3473 60 CONTINUE
3474 END IF
3475 END IF
3476*
3477* Undo scaling if necessary
3478*
3479 IF( iscl.EQ.1 ) THEN
3480 IF( anrm.GT.bignum )
3481 $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
3482 $ ierr )
3483 IF( info.NE.0 .AND. anrm.GT.bignum )
3484 $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn-1, 1, work( 2 ),
3485 $ minmn, ierr )
3486 IF( anrm.LT.smlnum )
3487 $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
3488 $ ierr )
3489 IF( info.NE.0 .AND. anrm.LT.smlnum )
3490 $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1, work( 2 ),
3491 $ minmn, ierr )
3492 END IF
3493*
3494* Return optimal workspace in WORK(1)
3495*
3496 work( 1 ) = maxwrk
3497*
3498 RETURN
3499*
3500* End of SGESVD
3501*
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
Definition ilaenv.f:162
subroutine sbdsqr(uplo, n, ncvt, nru, ncc, d, e, vt, ldvt, u, ldu, c, ldc, work, info)
SBDSQR
Definition sbdsqr.f:240

◆ sgesvdq()

subroutine sgesvdq ( character joba,
character jobp,
character jobr,
character jobu,
character jobv,
integer m,
integer n,
real, dimension( lda, * ) a,
integer lda,
real, dimension( * ) s,
real, dimension( ldu, * ) u,
integer ldu,
real, dimension( ldv, * ) v,
integer ldv,
integer numrank,
integer, dimension( * ) iwork,
integer liwork,
real, dimension( * ) work,
integer lwork,
real, dimension( * ) rwork,
integer lrwork,
integer info )

SGESVDQ computes the singular value decomposition (SVD) with a QR-Preconditioned QR SVD Method for GE matrices

Download SGESVDQ + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> SGESVDQ computes the singular value decomposition (SVD) of a real
!> M-by-N matrix A, where M >= N. The SVD of A is written as
!>                                    [++]   [xx]   [x0]   [xx]
!>              A = U * SIGMA * V^*,  [++] = [xx] * [ox] * [xx]
!>                                    [++]   [xx]
!> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal
!> matrix, and V is an N-by-N orthogonal matrix. The diagonal elements
!> of SIGMA are the singular values of A. The columns of U and V are the
!> left and the right singular vectors of A, respectively.
!> 
Parameters
[in]JOBA
!>  JOBA is CHARACTER*1
!>  Specifies the level of accuracy in the computed SVD
!>  = 'A' The requested accuracy corresponds to having the backward
!>        error bounded by || delta A ||_F <= f(m,n) * EPS * || A ||_F,
!>        where EPS = SLAMCH('Epsilon'). This authorises CGESVDQ to
!>        truncate the computed triangular factor in a rank revealing
!>        QR factorization whenever the truncated part is below the
!>        threshold of the order of EPS * ||A||_F. This is aggressive
!>        truncation level.
!>  = 'M' Similarly as with 'A', but the truncation is more gentle: it
!>        is allowed only when there is a drop on the diagonal of the
!>        triangular factor in the QR factorization. This is medium
!>        truncation level.
!>  = 'H' High accuracy requested. No numerical rank determination based
!>        on the rank revealing QR factorization is attempted.
!>  = 'E' Same as 'H', and in addition the condition number of column
!>        scaled A is estimated and returned in  RWORK(1).
!>        N^(-1/4)*RWORK(1) <= ||pinv(A_scaled)||_2 <= N^(1/4)*RWORK(1)
!> 
[in]JOBP
!>  JOBP is CHARACTER*1
!>  = 'P' The rows of A are ordered in decreasing order with respect to
!>        ||A(i,:)||_\infty. This enhances numerical accuracy at the cost
!>        of extra data movement. Recommended for numerical robustness.
!>  = 'N' No row pivoting.
!> 
[in]JOBR
!>          JOBR is CHARACTER*1
!>          = 'T' After the initial pivoted QR factorization, SGESVD is applied to
!>          the transposed R**T of the computed triangular factor R. This involves
!>          some extra data movement (matrix transpositions). Useful for
!>          experiments, research and development.
!>          = 'N' The triangular factor R is given as input to SGESVD. This may be
!>          preferred as it involves less data movement.
!> 
[in]JOBU
!>          JOBU is CHARACTER*1
!>          = 'A' All M left singular vectors are computed and returned in the
!>          matrix U. See the description of U.
!>          = 'S' or 'U' N = min(M,N) left singular vectors are computed and returned
!>          in the matrix U. See the description of U.
!>          = 'R' Numerical rank NUMRANK is determined and only NUMRANK left singular
!>          vectors are computed and returned in the matrix U.
!>          = 'F' The N left singular vectors are returned in factored form as the
!>          product of the Q factor from the initial QR factorization and the
!>          N left singular vectors of (R**T , 0)**T. If row pivoting is used,
!>          then the necessary information on the row pivoting is stored in
!>          IWORK(N+1:N+M-1).
!>          = 'N' The left singular vectors are not computed.
!> 
[in]JOBV
!>          JOBV is CHARACTER*1
!>          = 'A', 'V' All N right singular vectors are computed and returned in
!>          the matrix V.
!>          = 'R' Numerical rank NUMRANK is determined and only NUMRANK right singular
!>          vectors are computed and returned in the matrix V. This option is
!>          allowed only if JOBU = 'R' or JOBU = 'N'; otherwise it is illegal.
!>          = 'N' The right singular vectors are not computed.
!> 
[in]M
!>          M is INTEGER
!>          The number of rows of the input matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>          The number of columns of the input matrix A.  M >= N >= 0.
!> 
[in,out]A
!>          A is REAL array of dimensions LDA x N
!>          On entry, the input matrix A.
!>          On exit, if JOBU .NE. 'N' or JOBV .NE. 'N', the lower triangle of A contains
!>          the Householder vectors as stored by SGEQP3. If JOBU = 'F', these Householder
!>          vectors together with WORK(1:N) can be used to restore the Q factors from
!>          the initial pivoted QR factorization of A. See the description of U.
!> 
[in]LDA
!>          LDA is INTEGER.
!>          The leading dimension of the array A.  LDA >= max(1,M).
!> 
[out]S
!>          S is REAL array of dimension N.
!>          The singular values of A, ordered so that S(i) >= S(i+1).
!> 
[out]U
!>          U is REAL array, dimension
!>          LDU x M if JOBU = 'A'; see the description of LDU. In this case,
!>          on exit, U contains the M left singular vectors.
!>          LDU x N if JOBU = 'S', 'U', 'R' ; see the description of LDU. In this
!>          case, U contains the leading N or the leading NUMRANK left singular vectors.
!>          LDU x N if JOBU = 'F' ; see the description of LDU. In this case U
!>          contains N x N orthogonal matrix that can be used to form the left
!>          singular vectors.
!>          If JOBU = 'N', U is not referenced.
!> 
[in]LDU
!>          LDU is INTEGER.
!>          The leading dimension of the array U.
!>          If JOBU = 'A', 'S', 'U', 'R',  LDU >= max(1,M).
!>          If JOBU = 'F',                 LDU >= max(1,N).
!>          Otherwise,                     LDU >= 1.
!> 
[out]V
!>          V is REAL array, dimension
!>          LDV x N if JOBV = 'A', 'V', 'R' or if JOBA = 'E' .
!>          If JOBV = 'A', or 'V',  V contains the N-by-N orthogonal matrix  V**T;
!>          If JOBV = 'R', V contains the first NUMRANK rows of V**T (the right
!>          singular vectors, stored rowwise, of the NUMRANK largest singular values).
!>          If JOBV = 'N' and JOBA = 'E', V is used as a workspace.
!>          If JOBV = 'N', and JOBA.NE.'E', V is not referenced.
!> 
[in]LDV
!>          LDV is INTEGER
!>          The leading dimension of the array V.
!>          If JOBV = 'A', 'V', 'R',  or JOBA = 'E', LDV >= max(1,N).
!>          Otherwise,                               LDV >= 1.
!> 
[out]NUMRANK
!>          NUMRANK is INTEGER
!>          NUMRANK is the numerical rank first determined after the rank
!>          revealing QR factorization, following the strategy specified by the
!>          value of JOBA. If JOBV = 'R' and JOBU = 'R', only NUMRANK
!>          leading singular values and vectors are then requested in the call
!>          of SGESVD. The final value of NUMRANK might be further reduced if
!>          some singular values are computed as zeros.
!> 
[out]IWORK
!>          IWORK is INTEGER array, dimension (max(1, LIWORK)).
!>          On exit, IWORK(1:N) contains column pivoting permutation of the
!>          rank revealing QR factorization.
!>          If JOBP = 'P', IWORK(N+1:N+M-1) contains the indices of the sequence
!>          of row swaps used in row pivoting. These can be used to restore the
!>          left singular vectors in the case JOBU = 'F'.
!>
!>          If LIWORK, LWORK, or LRWORK = -1, then on exit, if INFO = 0,
!>          IWORK(1) returns the minimal LIWORK.
!> 
[in]LIWORK
!>          LIWORK is INTEGER
!>          The dimension of the array IWORK.
!>          LIWORK >= N + M - 1,     if JOBP = 'P' and JOBA .NE. 'E';
!>          LIWORK >= N              if JOBP = 'N' and JOBA .NE. 'E';
!>          LIWORK >= N + M - 1 + N, if JOBP = 'P' and JOBA = 'E';
!>          LIWORK >= N + N          if JOBP = 'N' and JOBA = 'E'.
!>
!>          If LIWORK = -1, then a workspace query is assumed; the routine
!>          only calculates and returns the optimal and minimal sizes
!>          for the WORK, IWORK, and RWORK arrays, and no error
!>          message related to LWORK is issued by XERBLA.
!> 
[out]WORK
!>          WORK is REAL array, dimension (max(2, LWORK)), used as a workspace.
!>          On exit, if, on entry, LWORK.NE.-1, WORK(1:N) contains parameters
!>          needed to recover the Q factor from the QR factorization computed by
!>          SGEQP3.
!>
!>          If LIWORK, LWORK, or LRWORK = -1, then on exit, if INFO = 0,
!>          WORK(1) returns the optimal LWORK, and
!>          WORK(2) returns the minimal LWORK.
!> 
[in,out]LWORK
!>          LWORK is INTEGER
!>          The dimension of the array WORK. It is determined as follows:
!>          Let  LWQP3 = 3*N+1,  LWCON = 3*N, and let
!>          LWORQ = { MAX( N, 1 ),  if JOBU = 'R', 'S', or 'U'
!>                  { MAX( M, 1 ),  if JOBU = 'A'
!>          LWSVD = MAX( 5*N, 1 )
!>          LWLQF = MAX( N/2, 1 ), LWSVD2 = MAX( 5*(N/2), 1 ), LWORLQ = MAX( N, 1 ),
!>          LWQRF = MAX( N/2, 1 ), LWORQ2 = MAX( N, 1 )
!>          Then the minimal value of LWORK is:
!>          = MAX( N + LWQP3, LWSVD )        if only the singular values are needed;
!>          = MAX( N + LWQP3, LWCON, LWSVD ) if only the singular values are needed,
!>                                   and a scaled condition estimate requested;
!>
!>          = N + MAX( LWQP3, LWSVD, LWORQ ) if the singular values and the left
!>                                   singular vectors are requested;
!>          = N + MAX( LWQP3, LWCON, LWSVD, LWORQ ) if the singular values and the left
!>                                   singular vectors are requested, and also
!>                                   a scaled condition estimate requested;
!>
!>          = N + MAX( LWQP3, LWSVD )        if the singular values and the right
!>                                   singular vectors are requested;
!>          = N + MAX( LWQP3, LWCON, LWSVD ) if the singular values and the right
!>                                   singular vectors are requested, and also
!>                                   a scaled condition etimate requested;
!>
!>          = N + MAX( LWQP3, LWSVD, LWORQ ) if the full SVD is requested with JOBV = 'R';
!>                                   independent of JOBR;
!>          = N + MAX( LWQP3, LWCON, LWSVD, LWORQ ) if the full SVD is requested,
!>                                   JOBV = 'R' and, also a scaled condition
!>                                   estimate requested; independent of JOBR;
!>          = MAX( N + MAX( LWQP3, LWSVD, LWORQ ),
!>         N + MAX( LWQP3, N/2+LWLQF, N/2+LWSVD2, N/2+LWORLQ, LWORQ) ) if the
!>                         full SVD is requested with JOBV = 'A' or 'V', and
!>                         JOBR ='N'
!>          = MAX( N + MAX( LWQP3, LWCON, LWSVD, LWORQ ),
!>         N + MAX( LWQP3, LWCON, N/2+LWLQF, N/2+LWSVD2, N/2+LWORLQ, LWORQ ) )
!>                         if the full SVD is requested with JOBV = 'A' or 'V', and
!>                         JOBR ='N', and also a scaled condition number estimate
!>                         requested.
!>          = MAX( N + MAX( LWQP3, LWSVD, LWORQ ),
!>         N + MAX( LWQP3, N/2+LWQRF, N/2+LWSVD2, N/2+LWORQ2, LWORQ ) ) if the
!>                         full SVD is requested with JOBV = 'A', 'V', and JOBR ='T'
!>          = MAX( N + MAX( LWQP3, LWCON, LWSVD, LWORQ ),
!>         N + MAX( LWQP3, LWCON, N/2+LWQRF, N/2+LWSVD2, N/2+LWORQ2, LWORQ ) )
!>                         if the full SVD is requested with JOBV = 'A' or 'V', and
!>                         JOBR ='T', and also a scaled condition number estimate
!>                         requested.
!>          Finally, LWORK must be at least two: LWORK = MAX( 2, LWORK ).
!>
!>          If LWORK = -1, then a workspace query is assumed; the routine
!>          only calculates and returns the optimal and minimal sizes
!>          for the WORK, IWORK, and RWORK arrays, and no error
!>          message related to LWORK is issued by XERBLA.
!> 
[out]RWORK
!>          RWORK is REAL array, dimension (max(1, LRWORK)).
!>          On exit,
!>          1. If JOBA = 'E', RWORK(1) contains an estimate of the condition
!>          number of column scaled A. If A = C * D where D is diagonal and C
!>          has unit columns in the Euclidean norm, then, assuming full column rank,
!>          N^(-1/4) * RWORK(1) <= ||pinv(C)||_2 <= N^(1/4) * RWORK(1).
!>          Otherwise, RWORK(1) = -1.
!>          2. RWORK(2) contains the number of singular values computed as
!>          exact zeros in SGESVD applied to the upper triangular or trapezoidal
!>          R (from the initial QR factorization). In case of early exit (no call to
!>          SGESVD, such as in the case of zero matrix) RWORK(2) = -1.
!>
!>          If LIWORK, LWORK, or LRWORK = -1, then on exit, if INFO = 0,
!>          RWORK(1) returns the minimal LRWORK.
!> 
[in]LRWORK
!>          LRWORK is INTEGER.
!>          The dimension of the array RWORK.
!>          If JOBP ='P', then LRWORK >= MAX(2, M).
!>          Otherwise, LRWORK >= 2
!>
!>          If LRWORK = -1, then a workspace query is assumed; the routine
!>          only calculates and returns the optimal and minimal sizes
!>          for the WORK, IWORK, and RWORK arrays, and no error
!>          message related to LWORK is issued by XERBLA.
!> 
[out]INFO
!>          INFO is INTEGER
!>          = 0:  successful exit.
!>          < 0:  if INFO = -i, the i-th argument had an illegal value.
!>          > 0:  if SBDSQR did not converge, INFO specifies how many superdiagonals
!>          of an intermediate bidiagonal form B (computed in SGESVD) did not
!>          converge to zero.
!> 
Further Details:
!>
!>   1. The data movement (matrix transpose) is coded using simple nested
!>   DO-loops because BLAS and LAPACK do not provide corresponding subroutines.
!>   Those DO-loops are easily identified in this source code - by the CONTINUE
!>   statements labeled with 11**. In an optimized version of this code, the
!>   nested DO loops should be replaced with calls to an optimized subroutine.
!>   2. This code scales A by 1/SQRT(M) if the largest ABS(A(i,j)) could cause
!>   column norm overflow. This is the minial precaution and it is left to the
!>   SVD routine (CGESVD) to do its own preemptive scaling if potential over-
!>   or underflows are detected. To avoid repeated scanning of the array A,
!>   an optimal implementation would do all necessary scaling before calling
!>   CGESVD and the scaling in CGESVD can be switched off.
!>   3. Other comments related to code optimization are given in comments in the
!>   code, enlosed in [[double brackets]].
!> 
Bugs, examples and comments
!>  Please report all bugs and send interesting examples and/or comments to
!>  drmac@math.hr. Thank you.
!> 
References
!>  [1] Zlatko Drmac, Algorithm 977: A QR-Preconditioned QR SVD Method for
!>      Computing the SVD with High Accuracy. ACM Trans. Math. Softw.
!>      44(1): 11:1-11:30 (2017)
!>
!>  SIGMA library, xGESVDQ section updated February 2016.
!>  Developed and coded by Zlatko Drmac, Department of Mathematics
!>  University of Zagreb, Croatia, drmac@math.hr
!> 
Contributors:
!> Developed and coded by Zlatko Drmac, Department of Mathematics
!>  University of Zagreb, Croatia, drmac@math.hr
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 412 of file sgesvdq.f.

415* .. Scalar Arguments ..
416 IMPLICIT NONE
417 CHARACTER JOBA, JOBP, JOBR, JOBU, JOBV
418 INTEGER M, N, LDA, LDU, LDV, NUMRANK, LIWORK, LWORK, LRWORK,
419 $ INFO
420* ..
421* .. Array Arguments ..
422 REAL A( LDA, * ), U( LDU, * ), V( LDV, * ), WORK( * )
423 REAL S( * ), RWORK( * )
424 INTEGER IWORK( * )
425*
426* =====================================================================
427*
428* .. Parameters ..
429 REAL ZERO, ONE
430 parameter( zero = 0.0e0, one = 1.0e0 )
431* ..
432* .. Local Scalars ..
433 INTEGER IERR, IWOFF, NR, N1, OPTRATIO, p, q
434 INTEGER LWCON, LWQP3, LWRK_SGELQF, LWRK_SGESVD, LWRK_SGESVD2,
435 $ LWRK_SGEQP3, LWRK_SGEQRF, LWRK_SORMLQ, LWRK_SORMQR,
436 $ LWRK_SORMQR2, LWLQF, LWQRF, LWSVD, LWSVD2, LWORQ,
437 $ LWORQ2, LWUNLQ, MINWRK, MINWRK2, OPTWRK, OPTWRK2,
438 $ IMINWRK, RMINWRK
439 LOGICAL ACCLA, ACCLM, ACCLH, ASCALED, CONDA, DNTWU, DNTWV,
440 $ LQUERY, LSVC0, LSVEC, ROWPRM, RSVEC, RTRANS, WNTUA,
441 $ WNTUF, WNTUR, WNTUS, WNTVA, WNTVR
442 REAL BIG, EPSLN, RTMP, SCONDA, SFMIN
443* ..
444* .. Local Arrays
445 REAL RDUMMY(1)
446* ..
447* .. External Subroutines (BLAS, LAPACK)
448 EXTERNAL sgelqf, sgeqp3, sgeqrf, sgesvd, slacpy, slapmt,
450 $ sormqr, xerbla
451* ..
452* .. External Functions (BLAS, LAPACK)
453 LOGICAL LSAME
454 INTEGER ISAMAX
455 REAL SLANGE, SNRM2, SLAMCH
456 EXTERNAL slange, lsame, isamax, snrm2, slamch
457* ..
458* .. Intrinsic Functions ..
459 INTRINSIC abs, max, min, real, sqrt
460* ..
461* .. Executable Statements ..
462*
463* Test the input arguments
464*
465 wntus = lsame( jobu, 'S' ) .OR. lsame( jobu, 'U' )
466 wntur = lsame( jobu, 'R' )
467 wntua = lsame( jobu, 'A' )
468 wntuf = lsame( jobu, 'F' )
469 lsvc0 = wntus .OR. wntur .OR. wntua
470 lsvec = lsvc0 .OR. wntuf
471 dntwu = lsame( jobu, 'N' )
472*
473 wntvr = lsame( jobv, 'R' )
474 wntva = lsame( jobv, 'A' ) .OR. lsame( jobv, 'V' )
475 rsvec = wntvr .OR. wntva
476 dntwv = lsame( jobv, 'N' )
477*
478 accla = lsame( joba, 'A' )
479 acclm = lsame( joba, 'M' )
480 conda = lsame( joba, 'E' )
481 acclh = lsame( joba, 'H' ) .OR. conda
482*
483 rowprm = lsame( jobp, 'P' )
484 rtrans = lsame( jobr, 'T' )
485*
486 IF ( rowprm ) THEN
487 IF ( conda ) THEN
488 iminwrk = max( 1, n + m - 1 + n )
489 ELSE
490 iminwrk = max( 1, n + m - 1 )
491 END IF
492 rminwrk = max( 2, m )
493 ELSE
494 IF ( conda ) THEN
495 iminwrk = max( 1, n + n )
496 ELSE
497 iminwrk = max( 1, n )
498 END IF
499 rminwrk = 2
500 END IF
501 lquery = (liwork .EQ. -1 .OR. lwork .EQ. -1 .OR. lrwork .EQ. -1)
502 info = 0
503 IF ( .NOT. ( accla .OR. acclm .OR. acclh ) ) THEN
504 info = -1
505 ELSE IF ( .NOT.( rowprm .OR. lsame( jobp, 'N' ) ) ) THEN
506 info = -2
507 ELSE IF ( .NOT.( rtrans .OR. lsame( jobr, 'N' ) ) ) THEN
508 info = -3
509 ELSE IF ( .NOT.( lsvec .OR. dntwu ) ) THEN
510 info = -4
511 ELSE IF ( wntur .AND. wntva ) THEN
512 info = -5
513 ELSE IF ( .NOT.( rsvec .OR. dntwv )) THEN
514 info = -5
515 ELSE IF ( m.LT.0 ) THEN
516 info = -6
517 ELSE IF ( ( n.LT.0 ) .OR. ( n.GT.m ) ) THEN
518 info = -7
519 ELSE IF ( lda.LT.max( 1, m ) ) THEN
520 info = -9
521 ELSE IF ( ldu.LT.1 .OR. ( lsvc0 .AND. ldu.LT.m ) .OR.
522 $ ( wntuf .AND. ldu.LT.n ) ) THEN
523 info = -12
524 ELSE IF ( ldv.LT.1 .OR. ( rsvec .AND. ldv.LT.n ) .OR.
525 $ ( conda .AND. ldv.LT.n ) ) THEN
526 info = -14
527 ELSE IF ( liwork .LT. iminwrk .AND. .NOT. lquery ) THEN
528 info = -17
529 END IF
530*
531*
532 IF ( info .EQ. 0 ) THEN
533* .. compute the minimal and the optimal workspace lengths
534* [[The expressions for computing the minimal and the optimal
535* values of LWORK are written with a lot of redundancy and
536* can be simplified. However, this detailed form is easier for
537* maintenance and modifications of the code.]]
538*
539* .. minimal workspace length for SGEQP3 of an M x N matrix
540 lwqp3 = 3 * n + 1
541* .. minimal workspace length for SORMQR to build left singular vectors
542 IF ( wntus .OR. wntur ) THEN
543 lworq = max( n , 1 )
544 ELSE IF ( wntua ) THEN
545 lworq = max( m , 1 )
546 END IF
547* .. minimal workspace length for SPOCON of an N x N matrix
548 lwcon = 3 * n
549* .. SGESVD of an N x N matrix
550 lwsvd = max( 5 * n, 1 )
551 IF ( lquery ) THEN
552 CALL sgeqp3( m, n, a, lda, iwork, rdummy, rdummy, -1,
553 $ ierr )
554 lwrk_sgeqp3 = int( rdummy(1) )
555 IF ( wntus .OR. wntur ) THEN
556 CALL sormqr( 'L', 'N', m, n, n, a, lda, rdummy, u,
557 $ ldu, rdummy, -1, ierr )
558 lwrk_sormqr = int( rdummy(1) )
559 ELSE IF ( wntua ) THEN
560 CALL sormqr( 'L', 'N', m, m, n, a, lda, rdummy, u,
561 $ ldu, rdummy, -1, ierr )
562 lwrk_sormqr = int( rdummy(1) )
563 ELSE
564 lwrk_sormqr = 0
565 END IF
566 END IF
567 minwrk = 2
568 optwrk = 2
569 IF ( .NOT. (lsvec .OR. rsvec )) THEN
570* .. minimal and optimal sizes of the workspace if
571* only the singular values are requested
572 IF ( conda ) THEN
573 minwrk = max( n+lwqp3, lwcon, lwsvd )
574 ELSE
575 minwrk = max( n+lwqp3, lwsvd )
576 END IF
577 IF ( lquery ) THEN
578 CALL sgesvd( 'N', 'N', n, n, a, lda, s, u, ldu,
579 $ v, ldv, rdummy, -1, ierr )
580 lwrk_sgesvd = int( rdummy(1) )
581 IF ( conda ) THEN
582 optwrk = max( n+lwrk_sgeqp3, n+lwcon, lwrk_sgesvd )
583 ELSE
584 optwrk = max( n+lwrk_sgeqp3, lwrk_sgesvd )
585 END IF
586 END IF
587 ELSE IF ( lsvec .AND. (.NOT.rsvec) ) THEN
588* .. minimal and optimal sizes of the workspace if the
589* singular values and the left singular vectors are requested
590 IF ( conda ) THEN
591 minwrk = n + max( lwqp3, lwcon, lwsvd, lworq )
592 ELSE
593 minwrk = n + max( lwqp3, lwsvd, lworq )
594 END IF
595 IF ( lquery ) THEN
596 IF ( rtrans ) THEN
597 CALL sgesvd( 'N', 'O', n, n, a, lda, s, u, ldu,
598 $ v, ldv, rdummy, -1, ierr )
599 ELSE
600 CALL sgesvd( 'O', 'N', n, n, a, lda, s, u, ldu,
601 $ v, ldv, rdummy, -1, ierr )
602 END IF
603 lwrk_sgesvd = int( rdummy(1) )
604 IF ( conda ) THEN
605 optwrk = n + max( lwrk_sgeqp3, lwcon, lwrk_sgesvd,
606 $ lwrk_sormqr )
607 ELSE
608 optwrk = n + max( lwrk_sgeqp3, lwrk_sgesvd,
609 $ lwrk_sormqr )
610 END IF
611 END IF
612 ELSE IF ( rsvec .AND. (.NOT.lsvec) ) THEN
613* .. minimal and optimal sizes of the workspace if the
614* singular values and the right singular vectors are requested
615 IF ( conda ) THEN
616 minwrk = n + max( lwqp3, lwcon, lwsvd )
617 ELSE
618 minwrk = n + max( lwqp3, lwsvd )
619 END IF
620 IF ( lquery ) THEN
621 IF ( rtrans ) THEN
622 CALL sgesvd( 'O', 'N', n, n, a, lda, s, u, ldu,
623 $ v, ldv, rdummy, -1, ierr )
624 ELSE
625 CALL sgesvd( 'N', 'O', n, n, a, lda, s, u, ldu,
626 $ v, ldv, rdummy, -1, ierr )
627 END IF
628 lwrk_sgesvd = int( rdummy(1) )
629 IF ( conda ) THEN
630 optwrk = n + max( lwrk_sgeqp3, lwcon, lwrk_sgesvd )
631 ELSE
632 optwrk = n + max( lwrk_sgeqp3, lwrk_sgesvd )
633 END IF
634 END IF
635 ELSE
636* .. minimal and optimal sizes of the workspace if the
637* full SVD is requested
638 IF ( rtrans ) THEN
639 minwrk = max( lwqp3, lwsvd, lworq )
640 IF ( conda ) minwrk = max( minwrk, lwcon )
641 minwrk = minwrk + n
642 IF ( wntva ) THEN
643* .. minimal workspace length for N x N/2 SGEQRF
644 lwqrf = max( n/2, 1 )
645* .. minimal workspace length for N/2 x N/2 SGESVD
646 lwsvd2 = max( 5 * (n/2), 1 )
647 lworq2 = max( n, 1 )
648 minwrk2 = max( lwqp3, n/2+lwqrf, n/2+lwsvd2,
649 $ n/2+lworq2, lworq )
650 IF ( conda ) minwrk2 = max( minwrk2, lwcon )
651 minwrk2 = n + minwrk2
652 minwrk = max( minwrk, minwrk2 )
653 END IF
654 ELSE
655 minwrk = max( lwqp3, lwsvd, lworq )
656 IF ( conda ) minwrk = max( minwrk, lwcon )
657 minwrk = minwrk + n
658 IF ( wntva ) THEN
659* .. minimal workspace length for N/2 x N SGELQF
660 lwlqf = max( n/2, 1 )
661 lwsvd2 = max( 5 * (n/2), 1 )
662 lwunlq = max( n , 1 )
663 minwrk2 = max( lwqp3, n/2+lwlqf, n/2+lwsvd2,
664 $ n/2+lwunlq, lworq )
665 IF ( conda ) minwrk2 = max( minwrk2, lwcon )
666 minwrk2 = n + minwrk2
667 minwrk = max( minwrk, minwrk2 )
668 END IF
669 END IF
670 IF ( lquery ) THEN
671 IF ( rtrans ) THEN
672 CALL sgesvd( 'O', 'A', n, n, a, lda, s, u, ldu,
673 $ v, ldv, rdummy, -1, ierr )
674 lwrk_sgesvd = int( rdummy(1) )
675 optwrk = max(lwrk_sgeqp3,lwrk_sgesvd,lwrk_sormqr)
676 IF ( conda ) optwrk = max( optwrk, lwcon )
677 optwrk = n + optwrk
678 IF ( wntva ) THEN
679 CALL sgeqrf(n,n/2,u,ldu,rdummy,rdummy,-1,ierr)
680 lwrk_sgeqrf = int( rdummy(1) )
681 CALL sgesvd( 'S', 'O', n/2,n/2, v,ldv, s, u,ldu,
682 $ v, ldv, rdummy, -1, ierr )
683 lwrk_sgesvd2 = int( rdummy(1) )
684 CALL sormqr( 'R', 'C', n, n, n/2, u, ldu, rdummy,
685 $ v, ldv, rdummy, -1, ierr )
686 lwrk_sormqr2 = int( rdummy(1) )
687 optwrk2 = max( lwrk_sgeqp3, n/2+lwrk_sgeqrf,
688 $ n/2+lwrk_sgesvd2, n/2+lwrk_sormqr2 )
689 IF ( conda ) optwrk2 = max( optwrk2, lwcon )
690 optwrk2 = n + optwrk2
691 optwrk = max( optwrk, optwrk2 )
692 END IF
693 ELSE
694 CALL sgesvd( 'S', 'O', n, n, a, lda, s, u, ldu,
695 $ v, ldv, rdummy, -1, ierr )
696 lwrk_sgesvd = int( rdummy(1) )
697 optwrk = max(lwrk_sgeqp3,lwrk_sgesvd,lwrk_sormqr)
698 IF ( conda ) optwrk = max( optwrk, lwcon )
699 optwrk = n + optwrk
700 IF ( wntva ) THEN
701 CALL sgelqf(n/2,n,u,ldu,rdummy,rdummy,-1,ierr)
702 lwrk_sgelqf = int( rdummy(1) )
703 CALL sgesvd( 'S','O', n/2,n/2, v, ldv, s, u, ldu,
704 $ v, ldv, rdummy, -1, ierr )
705 lwrk_sgesvd2 = int( rdummy(1) )
706 CALL sormlq( 'R', 'N', n, n, n/2, u, ldu, rdummy,
707 $ v, ldv, rdummy,-1,ierr )
708 lwrk_sormlq = int( rdummy(1) )
709 optwrk2 = max( lwrk_sgeqp3, n/2+lwrk_sgelqf,
710 $ n/2+lwrk_sgesvd2, n/2+lwrk_sormlq )
711 IF ( conda ) optwrk2 = max( optwrk2, lwcon )
712 optwrk2 = n + optwrk2
713 optwrk = max( optwrk, optwrk2 )
714 END IF
715 END IF
716 END IF
717 END IF
718*
719 minwrk = max( 2, minwrk )
720 optwrk = max( 2, optwrk )
721 IF ( lwork .LT. minwrk .AND. (.NOT.lquery) ) info = -19
722*
723 END IF
724*
725 IF (info .EQ. 0 .AND. lrwork .LT. rminwrk .AND. .NOT. lquery) THEN
726 info = -21
727 END IF
728 IF( info.NE.0 ) THEN
729 CALL xerbla( 'SGESVDQ', -info )
730 RETURN
731 ELSE IF ( lquery ) THEN
732*
733* Return optimal workspace
734*
735 iwork(1) = iminwrk
736 work(1) = optwrk
737 work(2) = minwrk
738 rwork(1) = rminwrk
739 RETURN
740 END IF
741*
742* Quick return if the matrix is void.
743*
744 IF( ( m.EQ.0 ) .OR. ( n.EQ.0 ) ) THEN
745* .. all output is void.
746 RETURN
747 END IF
748*
749 big = slamch('O')
750 ascaled = .false.
751 iwoff = 1
752 IF ( rowprm ) THEN
753 iwoff = m
754* .. reordering the rows in decreasing sequence in the
755* ell-infinity norm - this enhances numerical robustness in
756* the case of differently scaled rows.
757 DO 1904 p = 1, m
758* RWORK(p) = ABS( A(p,ICAMAX(N,A(p,1),LDA)) )
759* [[SLANGE will return NaN if an entry of the p-th row is Nan]]
760 rwork(p) = slange( 'M', 1, n, a(p,1), lda, rdummy )
761* .. check for NaN's and Inf's
762 IF ( ( rwork(p) .NE. rwork(p) ) .OR.
763 $ ( (rwork(p)*zero) .NE. zero ) ) THEN
764 info = -8
765 CALL xerbla( 'SGESVDQ', -info )
766 RETURN
767 END IF
768 1904 CONTINUE
769 DO 1952 p = 1, m - 1
770 q = isamax( m-p+1, rwork(p), 1 ) + p - 1
771 iwork(n+p) = q
772 IF ( p .NE. q ) THEN
773 rtmp = rwork(p)
774 rwork(p) = rwork(q)
775 rwork(q) = rtmp
776 END IF
777 1952 CONTINUE
778*
779 IF ( rwork(1) .EQ. zero ) THEN
780* Quick return: A is the M x N zero matrix.
781 numrank = 0
782 CALL slaset( 'G', n, 1, zero, zero, s, n )
783 IF ( wntus ) CALL slaset('G', m, n, zero, one, u, ldu)
784 IF ( wntua ) CALL slaset('G', m, m, zero, one, u, ldu)
785 IF ( wntva ) CALL slaset('G', n, n, zero, one, v, ldv)
786 IF ( wntuf ) THEN
787 CALL slaset( 'G', n, 1, zero, zero, work, n )
788 CALL slaset( 'G', m, n, zero, one, u, ldu )
789 END IF
790 DO 5001 p = 1, n
791 iwork(p) = p
792 5001 CONTINUE
793 IF ( rowprm ) THEN
794 DO 5002 p = n + 1, n + m - 1
795 iwork(p) = p - n
796 5002 CONTINUE
797 END IF
798 IF ( conda ) rwork(1) = -1
799 rwork(2) = -1
800 RETURN
801 END IF
802*
803 IF ( rwork(1) .GT. big / sqrt(real(m)) ) THEN
804* .. to prevent overflow in the QR factorization, scale the
805* matrix by 1/sqrt(M) if too large entry detected
806 CALL slascl('G',0,0,sqrt(real(m)),one, m,n, a,lda, ierr)
807 ascaled = .true.
808 END IF
809 CALL slaswp( n, a, lda, 1, m-1, iwork(n+1), 1 )
810 END IF
811*
812* .. At this stage, preemptive scaling is done only to avoid column
813* norms overflows during the QR factorization. The SVD procedure should
814* have its own scaling to save the singular values from overflows and
815* underflows. That depends on the SVD procedure.
816*
817 IF ( .NOT.rowprm ) THEN
818 rtmp = slange( 'M', m, n, a, lda, rdummy )
819 IF ( ( rtmp .NE. rtmp ) .OR.
820 $ ( (rtmp*zero) .NE. zero ) ) THEN
821 info = -8
822 CALL xerbla( 'SGESVDQ', -info )
823 RETURN
824 END IF
825 IF ( rtmp .GT. big / sqrt(real(m)) ) THEN
826* .. to prevent overflow in the QR factorization, scale the
827* matrix by 1/sqrt(M) if too large entry detected
828 CALL slascl('G',0,0, sqrt(real(m)),one, m,n, a,lda, ierr)
829 ascaled = .true.
830 END IF
831 END IF
832*
833* .. QR factorization with column pivoting
834*
835* A * P = Q * [ R ]
836* [ 0 ]
837*
838 DO 1963 p = 1, n
839* .. all columns are free columns
840 iwork(p) = 0
841 1963 CONTINUE
842 CALL sgeqp3( m, n, a, lda, iwork, work, work(n+1), lwork-n,
843 $ ierr )
844*
845* If the user requested accuracy level allows truncation in the
846* computed upper triangular factor, the matrix R is examined and,
847* if possible, replaced with its leading upper trapezoidal part.
848*
849 epsln = slamch('E')
850 sfmin = slamch('S')
851* SMALL = SFMIN / EPSLN
852 nr = n
853*
854 IF ( accla ) THEN
855*
856* Standard absolute error bound suffices. All sigma_i with
857* sigma_i < N*EPS*||A||_F are flushed to zero. This is an
858* aggressive enforcement of lower numerical rank by introducing a
859* backward error of the order of N*EPS*||A||_F.
860 nr = 1
861 rtmp = sqrt(real(n))*epsln
862 DO 3001 p = 2, n
863 IF ( abs(a(p,p)) .LT. (rtmp*abs(a(1,1))) ) GO TO 3002
864 nr = nr + 1
865 3001 CONTINUE
866 3002 CONTINUE
867*
868 ELSEIF ( acclm ) THEN
869* .. similarly as above, only slightly more gentle (less aggressive).
870* Sudden drop on the diagonal of R is used as the criterion for being
871* close-to-rank-deficient. The threshold is set to EPSLN=SLAMCH('E').
872* [[This can be made more flexible by replacing this hard-coded value
873* with a user specified threshold.]] Also, the values that underflow
874* will be truncated.
875 nr = 1
876 DO 3401 p = 2, n
877 IF ( ( abs(a(p,p)) .LT. (epsln*abs(a(p-1,p-1))) ) .OR.
878 $ ( abs(a(p,p)) .LT. sfmin ) ) GO TO 3402
879 nr = nr + 1
880 3401 CONTINUE
881 3402 CONTINUE
882*
883 ELSE
884* .. RRQR not authorized to determine numerical rank except in the
885* obvious case of zero pivots.
886* .. inspect R for exact zeros on the diagonal;
887* R(i,i)=0 => R(i:N,i:N)=0.
888 nr = 1
889 DO 3501 p = 2, n
890 IF ( abs(a(p,p)) .EQ. zero ) GO TO 3502
891 nr = nr + 1
892 3501 CONTINUE
893 3502 CONTINUE
894*
895 IF ( conda ) THEN
896* Estimate the scaled condition number of A. Use the fact that it is
897* the same as the scaled condition number of R.
898* .. V is used as workspace
899 CALL slacpy( 'U', n, n, a, lda, v, ldv )
900* Only the leading NR x NR submatrix of the triangular factor
901* is considered. Only if NR=N will this give a reliable error
902* bound. However, even for NR < N, this can be used on an
903* expert level and obtain useful information in the sense of
904* perturbation theory.
905 DO 3053 p = 1, nr
906 rtmp = snrm2( p, v(1,p), 1 )
907 CALL sscal( p, one/rtmp, v(1,p), 1 )
908 3053 CONTINUE
909 IF ( .NOT. ( lsvec .OR. rsvec ) ) THEN
910 CALL spocon( 'U', nr, v, ldv, one, rtmp,
911 $ work, iwork(n+iwoff), ierr )
912 ELSE
913 CALL spocon( 'U', nr, v, ldv, one, rtmp,
914 $ work(n+1), iwork(n+iwoff), ierr )
915 END IF
916 sconda = one / sqrt(rtmp)
917* For NR=N, SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1),
918* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
919* See the reference [1] for more details.
920 END IF
921*
922 ENDIF
923*
924 IF ( wntur ) THEN
925 n1 = nr
926 ELSE IF ( wntus .OR. wntuf) THEN
927 n1 = n
928 ELSE IF ( wntua ) THEN
929 n1 = m
930 END IF
931*
932 IF ( .NOT. ( rsvec .OR. lsvec ) ) THEN
933*.......................................................................
934* .. only the singular values are requested
935*.......................................................................
936 IF ( rtrans ) THEN
937*
938* .. compute the singular values of R**T = [A](1:NR,1:N)**T
939* .. set the lower triangle of [A] to [A](1:NR,1:N)**T and
940* the upper triangle of [A] to zero.
941 DO 1146 p = 1, min( n, nr )
942 DO 1147 q = p + 1, n
943 a(q,p) = a(p,q)
944 IF ( q .LE. nr ) a(p,q) = zero
945 1147 CONTINUE
946 1146 CONTINUE
947*
948 CALL sgesvd( 'N', 'N', n, nr, a, lda, s, u, ldu,
949 $ v, ldv, work, lwork, info )
950*
951 ELSE
952*
953* .. compute the singular values of R = [A](1:NR,1:N)
954*
955 IF ( nr .GT. 1 )
956 $ CALL slaset( 'L', nr-1,nr-1, zero,zero, a(2,1), lda )
957 CALL sgesvd( 'N', 'N', nr, n, a, lda, s, u, ldu,
958 $ v, ldv, work, lwork, info )
959*
960 END IF
961*
962 ELSE IF ( lsvec .AND. ( .NOT. rsvec) ) THEN
963*.......................................................................
964* .. the singular values and the left singular vectors requested
965*.......................................................................""""""""
966 IF ( rtrans ) THEN
967* .. apply SGESVD to R**T
968* .. copy R**T into [U] and overwrite [U] with the right singular
969* vectors of R
970 DO 1192 p = 1, nr
971 DO 1193 q = p, n
972 u(q,p) = a(p,q)
973 1193 CONTINUE
974 1192 CONTINUE
975 IF ( nr .GT. 1 )
976 $ CALL slaset( 'U', nr-1,nr-1, zero,zero, u(1,2), ldu )
977* .. the left singular vectors not computed, the NR right singular
978* vectors overwrite [U](1:NR,1:NR) as transposed. These
979* will be pre-multiplied by Q to build the left singular vectors of A.
980 CALL sgesvd( 'N', 'O', n, nr, u, ldu, s, u, ldu,
981 $ u, ldu, work(n+1), lwork-n, info )
982*
983 DO 1119 p = 1, nr
984 DO 1120 q = p + 1, nr
985 rtmp = u(q,p)
986 u(q,p) = u(p,q)
987 u(p,q) = rtmp
988 1120 CONTINUE
989 1119 CONTINUE
990*
991 ELSE
992* .. apply SGESVD to R
993* .. copy R into [U] and overwrite [U] with the left singular vectors
994 CALL slacpy( 'U', nr, n, a, lda, u, ldu )
995 IF ( nr .GT. 1 )
996 $ CALL slaset( 'L', nr-1, nr-1, zero, zero, u(2,1), ldu )
997* .. the right singular vectors not computed, the NR left singular
998* vectors overwrite [U](1:NR,1:NR)
999 CALL sgesvd( 'O', 'N', nr, n, u, ldu, s, u, ldu,
1000 $ v, ldv, work(n+1), lwork-n, info )
1001* .. now [U](1:NR,1:NR) contains the NR left singular vectors of
1002* R. These will be pre-multiplied by Q to build the left singular
1003* vectors of A.
1004 END IF
1005*
1006* .. assemble the left singular vector matrix U of dimensions
1007* (M x NR) or (M x N) or (M x M).
1008 IF ( ( nr .LT. m ) .AND. ( .NOT.wntuf ) ) THEN
1009 CALL slaset('A', m-nr, nr, zero, zero, u(nr+1,1), ldu)
1010 IF ( nr .LT. n1 ) THEN
1011 CALL slaset( 'A',nr,n1-nr,zero,zero,u(1,nr+1), ldu )
1012 CALL slaset( 'A',m-nr,n1-nr,zero,one,
1013 $ u(nr+1,nr+1), ldu )
1014 END IF
1015 END IF
1016*
1017* The Q matrix from the first QRF is built into the left singular
1018* vectors matrix U.
1019*
1020 IF ( .NOT.wntuf )
1021 $ CALL sormqr( 'L', 'N', m, n1, n, a, lda, work, u,
1022 $ ldu, work(n+1), lwork-n, ierr )
1023 IF ( rowprm .AND. .NOT.wntuf )
1024 $ CALL slaswp( n1, u, ldu, 1, m-1, iwork(n+1), -1 )
1025*
1026 ELSE IF ( rsvec .AND. ( .NOT. lsvec ) ) THEN
1027*.......................................................................
1028* .. the singular values and the right singular vectors requested
1029*.......................................................................
1030 IF ( rtrans ) THEN
1031* .. apply SGESVD to R**T
1032* .. copy R**T into V and overwrite V with the left singular vectors
1033 DO 1165 p = 1, nr
1034 DO 1166 q = p, n
1035 v(q,p) = (a(p,q))
1036 1166 CONTINUE
1037 1165 CONTINUE
1038 IF ( nr .GT. 1 )
1039 $ CALL slaset( 'U', nr-1,nr-1, zero,zero, v(1,2), ldv )
1040* .. the left singular vectors of R**T overwrite V, the right singular
1041* vectors not computed
1042 IF ( wntvr .OR. ( nr .EQ. n ) ) THEN
1043 CALL sgesvd( 'O', 'N', n, nr, v, ldv, s, u, ldu,
1044 $ u, ldu, work(n+1), lwork-n, info )
1045*
1046 DO 1121 p = 1, nr
1047 DO 1122 q = p + 1, nr
1048 rtmp = v(q,p)
1049 v(q,p) = v(p,q)
1050 v(p,q) = rtmp
1051 1122 CONTINUE
1052 1121 CONTINUE
1053*
1054 IF ( nr .LT. n ) THEN
1055 DO 1103 p = 1, nr
1056 DO 1104 q = nr + 1, n
1057 v(p,q) = v(q,p)
1058 1104 CONTINUE
1059 1103 CONTINUE
1060 END IF
1061 CALL slapmt( .false., nr, n, v, ldv, iwork )
1062 ELSE
1063* .. need all N right singular vectors and NR < N
1064* [!] This is simple implementation that augments [V](1:N,1:NR)
1065* by padding a zero block. In the case NR << N, a more efficient
1066* way is to first use the QR factorization. For more details
1067* how to implement this, see the " FULL SVD " branch.
1068 CALL slaset('G', n, n-nr, zero, zero, v(1,nr+1), ldv)
1069 CALL sgesvd( 'O', 'N', n, n, v, ldv, s, u, ldu,
1070 $ u, ldu, work(n+1), lwork-n, info )
1071*
1072 DO 1123 p = 1, n
1073 DO 1124 q = p + 1, n
1074 rtmp = v(q,p)
1075 v(q,p) = v(p,q)
1076 v(p,q) = rtmp
1077 1124 CONTINUE
1078 1123 CONTINUE
1079 CALL slapmt( .false., n, n, v, ldv, iwork )
1080 END IF
1081*
1082 ELSE
1083* .. aply SGESVD to R
1084* .. copy R into V and overwrite V with the right singular vectors
1085 CALL slacpy( 'U', nr, n, a, lda, v, ldv )
1086 IF ( nr .GT. 1 )
1087 $ CALL slaset( 'L', nr-1, nr-1, zero, zero, v(2,1), ldv )
1088* .. the right singular vectors overwrite V, the NR left singular
1089* vectors stored in U(1:NR,1:NR)
1090 IF ( wntvr .OR. ( nr .EQ. n ) ) THEN
1091 CALL sgesvd( 'N', 'O', nr, n, v, ldv, s, u, ldu,
1092 $ v, ldv, work(n+1), lwork-n, info )
1093 CALL slapmt( .false., nr, n, v, ldv, iwork )
1094* .. now [V](1:NR,1:N) contains V(1:N,1:NR)**T
1095 ELSE
1096* .. need all N right singular vectors and NR < N
1097* [!] This is simple implementation that augments [V](1:NR,1:N)
1098* by padding a zero block. In the case NR << N, a more efficient
1099* way is to first use the LQ factorization. For more details
1100* how to implement this, see the " FULL SVD " branch.
1101 CALL slaset('G', n-nr, n, zero,zero, v(nr+1,1), ldv)
1102 CALL sgesvd( 'N', 'O', n, n, v, ldv, s, u, ldu,
1103 $ v, ldv, work(n+1), lwork-n, info )
1104 CALL slapmt( .false., n, n, v, ldv, iwork )
1105 END IF
1106* .. now [V] contains the transposed matrix of the right singular
1107* vectors of A.
1108 END IF
1109*
1110 ELSE
1111*.......................................................................
1112* .. FULL SVD requested
1113*.......................................................................
1114 IF ( rtrans ) THEN
1115*
1116* .. apply SGESVD to R**T [[this option is left for R&D&T]]
1117*
1118 IF ( wntvr .OR. ( nr .EQ. n ) ) THEN
1119* .. copy R**T into [V] and overwrite [V] with the left singular
1120* vectors of R**T
1121 DO 1168 p = 1, nr
1122 DO 1169 q = p, n
1123 v(q,p) = a(p,q)
1124 1169 CONTINUE
1125 1168 CONTINUE
1126 IF ( nr .GT. 1 )
1127 $ CALL slaset( 'U', nr-1,nr-1, zero,zero, v(1,2), ldv )
1128*
1129* .. the left singular vectors of R**T overwrite [V], the NR right
1130* singular vectors of R**T stored in [U](1:NR,1:NR) as transposed
1131 CALL sgesvd( 'O', 'A', n, nr, v, ldv, s, v, ldv,
1132 $ u, ldu, work(n+1), lwork-n, info )
1133* .. assemble V
1134 DO 1115 p = 1, nr
1135 DO 1116 q = p + 1, nr
1136 rtmp = v(q,p)
1137 v(q,p) = v(p,q)
1138 v(p,q) = rtmp
1139 1116 CONTINUE
1140 1115 CONTINUE
1141 IF ( nr .LT. n ) THEN
1142 DO 1101 p = 1, nr
1143 DO 1102 q = nr+1, n
1144 v(p,q) = v(q,p)
1145 1102 CONTINUE
1146 1101 CONTINUE
1147 END IF
1148 CALL slapmt( .false., nr, n, v, ldv, iwork )
1149*
1150 DO 1117 p = 1, nr
1151 DO 1118 q = p + 1, nr
1152 rtmp = u(q,p)
1153 u(q,p) = u(p,q)
1154 u(p,q) = rtmp
1155 1118 CONTINUE
1156 1117 CONTINUE
1157*
1158 IF ( ( nr .LT. m ) .AND. .NOT.(wntuf)) THEN
1159 CALL slaset('A', m-nr,nr, zero,zero, u(nr+1,1), ldu)
1160 IF ( nr .LT. n1 ) THEN
1161 CALL slaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu)
1162 CALL slaset( 'A',m-nr,n1-nr,zero,one,
1163 $ u(nr+1,nr+1), ldu )
1164 END IF
1165 END IF
1166*
1167 ELSE
1168* .. need all N right singular vectors and NR < N
1169* .. copy R**T into [V] and overwrite [V] with the left singular
1170* vectors of R**T
1171* [[The optimal ratio N/NR for using QRF instead of padding
1172* with zeros. Here hard coded to 2; it must be at least
1173* two due to work space constraints.]]
1174* OPTRATIO = ILAENV(6, 'SGESVD', 'S' // 'O', NR,N,0,0)
1175* OPTRATIO = MAX( OPTRATIO, 2 )
1176 optratio = 2
1177 IF ( optratio*nr .GT. n ) THEN
1178 DO 1198 p = 1, nr
1179 DO 1199 q = p, n
1180 v(q,p) = a(p,q)
1181 1199 CONTINUE
1182 1198 CONTINUE
1183 IF ( nr .GT. 1 )
1184 $ CALL slaset('U',nr-1,nr-1, zero,zero, v(1,2),ldv)
1185*
1186 CALL slaset('A',n,n-nr,zero,zero,v(1,nr+1),ldv)
1187 CALL sgesvd( 'O', 'A', n, n, v, ldv, s, v, ldv,
1188 $ u, ldu, work(n+1), lwork-n, info )
1189*
1190 DO 1113 p = 1, n
1191 DO 1114 q = p + 1, n
1192 rtmp = v(q,p)
1193 v(q,p) = v(p,q)
1194 v(p,q) = rtmp
1195 1114 CONTINUE
1196 1113 CONTINUE
1197 CALL slapmt( .false., n, n, v, ldv, iwork )
1198* .. assemble the left singular vector matrix U of dimensions
1199* (M x N1), i.e. (M x N) or (M x M).
1200*
1201 DO 1111 p = 1, n
1202 DO 1112 q = p + 1, n
1203 rtmp = u(q,p)
1204 u(q,p) = u(p,q)
1205 u(p,q) = rtmp
1206 1112 CONTINUE
1207 1111 CONTINUE
1208*
1209 IF ( ( n .LT. m ) .AND. .NOT.(wntuf)) THEN
1210 CALL slaset('A',m-n,n,zero,zero,u(n+1,1),ldu)
1211 IF ( n .LT. n1 ) THEN
1212 CALL slaset('A',n,n1-n,zero,zero,u(1,n+1),ldu)
1213 CALL slaset('A',m-n,n1-n,zero,one,
1214 $ u(n+1,n+1), ldu )
1215 END IF
1216 END IF
1217 ELSE
1218* .. copy R**T into [U] and overwrite [U] with the right
1219* singular vectors of R
1220 DO 1196 p = 1, nr
1221 DO 1197 q = p, n
1222 u(q,nr+p) = a(p,q)
1223 1197 CONTINUE
1224 1196 CONTINUE
1225 IF ( nr .GT. 1 )
1226 $ CALL slaset('U',nr-1,nr-1,zero,zero,u(1,nr+2),ldu)
1227 CALL sgeqrf( n, nr, u(1,nr+1), ldu, work(n+1),
1228 $ work(n+nr+1), lwork-n-nr, ierr )
1229 DO 1143 p = 1, nr
1230 DO 1144 q = 1, n
1231 v(q,p) = u(p,nr+q)
1232 1144 CONTINUE
1233 1143 CONTINUE
1234 CALL slaset('U',nr-1,nr-1,zero,zero,v(1,2),ldv)
1235 CALL sgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,
1236 $ v,ldv, work(n+nr+1),lwork-n-nr, info )
1237 CALL slaset('A',n-nr,nr,zero,zero,v(nr+1,1),ldv)
1238 CALL slaset('A',nr,n-nr,zero,zero,v(1,nr+1),ldv)
1239 CALL slaset('A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv)
1240 CALL sormqr('R','C', n, n, nr, u(1,nr+1), ldu,
1241 $ work(n+1),v,ldv,work(n+nr+1),lwork-n-nr,ierr)
1242 CALL slapmt( .false., n, n, v, ldv, iwork )
1243* .. assemble the left singular vector matrix U of dimensions
1244* (M x NR) or (M x N) or (M x M).
1245 IF ( ( nr .LT. m ) .AND. .NOT.(wntuf)) THEN
1246 CALL slaset('A',m-nr,nr,zero,zero,u(nr+1,1),ldu)
1247 IF ( nr .LT. n1 ) THEN
1248 CALL slaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu)
1249 CALL slaset( 'A',m-nr,n1-nr,zero,one,
1250 $ u(nr+1,nr+1),ldu)
1251 END IF
1252 END IF
1253 END IF
1254 END IF
1255*
1256 ELSE
1257*
1258* .. apply SGESVD to R [[this is the recommended option]]
1259*
1260 IF ( wntvr .OR. ( nr .EQ. n ) ) THEN
1261* .. copy R into [V] and overwrite V with the right singular vectors
1262 CALL slacpy( 'U', nr, n, a, lda, v, ldv )
1263 IF ( nr .GT. 1 )
1264 $ CALL slaset( 'L', nr-1,nr-1, zero,zero, v(2,1), ldv )
1265* .. the right singular vectors of R overwrite [V], the NR left
1266* singular vectors of R stored in [U](1:NR,1:NR)
1267 CALL sgesvd( 'S', 'O', nr, n, v, ldv, s, u, ldu,
1268 $ v, ldv, work(n+1), lwork-n, info )
1269 CALL slapmt( .false., nr, n, v, ldv, iwork )
1270* .. now [V](1:NR,1:N) contains V(1:N,1:NR)**T
1271* .. assemble the left singular vector matrix U of dimensions
1272* (M x NR) or (M x N) or (M x M).
1273 IF ( ( nr .LT. m ) .AND. .NOT.(wntuf)) THEN
1274 CALL slaset('A', m-nr,nr, zero,zero, u(nr+1,1), ldu)
1275 IF ( nr .LT. n1 ) THEN
1276 CALL slaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu)
1277 CALL slaset( 'A',m-nr,n1-nr,zero,one,
1278 $ u(nr+1,nr+1), ldu )
1279 END IF
1280 END IF
1281*
1282 ELSE
1283* .. need all N right singular vectors and NR < N
1284* .. the requested number of the left singular vectors
1285* is then N1 (N or M)
1286* [[The optimal ratio N/NR for using LQ instead of padding
1287* with zeros. Here hard coded to 2; it must be at least
1288* two due to work space constraints.]]
1289* OPTRATIO = ILAENV(6, 'SGESVD', 'S' // 'O', NR,N,0,0)
1290* OPTRATIO = MAX( OPTRATIO, 2 )
1291 optratio = 2
1292 IF ( optratio * nr .GT. n ) THEN
1293 CALL slacpy( 'U', nr, n, a, lda, v, ldv )
1294 IF ( nr .GT. 1 )
1295 $ CALL slaset('L', nr-1,nr-1, zero,zero, v(2,1),ldv)
1296* .. the right singular vectors of R overwrite [V], the NR left
1297* singular vectors of R stored in [U](1:NR,1:NR)
1298 CALL slaset('A', n-nr,n, zero,zero, v(nr+1,1),ldv)
1299 CALL sgesvd( 'S', 'O', n, n, v, ldv, s, u, ldu,
1300 $ v, ldv, work(n+1), lwork-n, info )
1301 CALL slapmt( .false., n, n, v, ldv, iwork )
1302* .. now [V] contains the transposed matrix of the right
1303* singular vectors of A. The leading N left singular vectors
1304* are in [U](1:N,1:N)
1305* .. assemble the left singular vector matrix U of dimensions
1306* (M x N1), i.e. (M x N) or (M x M).
1307 IF ( ( n .LT. m ) .AND. .NOT.(wntuf)) THEN
1308 CALL slaset('A',m-n,n,zero,zero,u(n+1,1),ldu)
1309 IF ( n .LT. n1 ) THEN
1310 CALL slaset('A',n,n1-n,zero,zero,u(1,n+1),ldu)
1311 CALL slaset( 'A',m-n,n1-n,zero,one,
1312 $ u(n+1,n+1), ldu )
1313 END IF
1314 END IF
1315 ELSE
1316 CALL slacpy( 'U', nr, n, a, lda, u(nr+1,1), ldu )
1317 IF ( nr .GT. 1 )
1318 $ CALL slaset('L',nr-1,nr-1,zero,zero,u(nr+2,1),ldu)
1319 CALL sgelqf( nr, n, u(nr+1,1), ldu, work(n+1),
1320 $ work(n+nr+1), lwork-n-nr, ierr )
1321 CALL slacpy('L',nr,nr,u(nr+1,1),ldu,v,ldv)
1322 IF ( nr .GT. 1 )
1323 $ CALL slaset('U',nr-1,nr-1,zero,zero,v(1,2),ldv)
1324 CALL sgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,
1325 $ v, ldv, work(n+nr+1), lwork-n-nr, info )
1326 CALL slaset('A',n-nr,nr,zero,zero,v(nr+1,1),ldv)
1327 CALL slaset('A',nr,n-nr,zero,zero,v(1,nr+1),ldv)
1328 CALL slaset('A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv)
1329 CALL sormlq('R','N',n,n,nr,u(nr+1,1),ldu,work(n+1),
1330 $ v, ldv, work(n+nr+1),lwork-n-nr,ierr)
1331 CALL slapmt( .false., n, n, v, ldv, iwork )
1332* .. assemble the left singular vector matrix U of dimensions
1333* (M x NR) or (M x N) or (M x M).
1334 IF ( ( nr .LT. m ) .AND. .NOT.(wntuf)) THEN
1335 CALL slaset('A',m-nr,nr,zero,zero,u(nr+1,1),ldu)
1336 IF ( nr .LT. n1 ) THEN
1337 CALL slaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu)
1338 CALL slaset( 'A',m-nr,n1-nr,zero,one,
1339 $ u(nr+1,nr+1), ldu )
1340 END IF
1341 END IF
1342 END IF
1343 END IF
1344* .. end of the "R**T or R" branch
1345 END IF
1346*
1347* The Q matrix from the first QRF is built into the left singular
1348* vectors matrix U.
1349*
1350 IF ( .NOT. wntuf )
1351 $ CALL sormqr( 'L', 'N', m, n1, n, a, lda, work, u,
1352 $ ldu, work(n+1), lwork-n, ierr )
1353 IF ( rowprm .AND. .NOT.wntuf )
1354 $ CALL slaswp( n1, u, ldu, 1, m-1, iwork(n+1), -1 )
1355*
1356* ... end of the "full SVD" branch
1357 END IF
1358*
1359* Check whether some singular values are returned as zeros, e.g.
1360* due to underflow, and update the numerical rank.
1361 p = nr
1362 DO 4001 q = p, 1, -1
1363 IF ( s(q) .GT. zero ) GO TO 4002
1364 nr = nr - 1
1365 4001 CONTINUE
1366 4002 CONTINUE
1367*
1368* .. if numerical rank deficiency is detected, the truncated
1369* singular values are set to zero.
1370 IF ( nr .LT. n ) CALL slaset( 'G', n-nr,1, zero,zero, s(nr+1), n )
1371* .. undo scaling; this may cause overflow in the largest singular
1372* values.
1373 IF ( ascaled )
1374 $ CALL slascl( 'G',0,0, one,sqrt(real(m)), nr,1, s, n, ierr )
1375 IF ( conda ) rwork(1) = sconda
1376 rwork(2) = p - nr
1377* .. p-NR is the number of singular values that are computed as
1378* exact zeros in SGESVD() applied to the (possibly truncated)
1379* full row rank triangular (trapezoidal) factor of A.
1380 numrank = nr
1381*
1382 RETURN
1383*
1384* End of SGESVDQ
1385*
subroutine sgesvd(jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, info)
SGESVD computes the singular value decomposition (SVD) for GE matrices
Definition sgesvd.f:211
subroutine slapmt(forwrd, m, n, x, ldx, k)
SLAPMT performs a forward or backward permutation of the columns of a matrix.
Definition slapmt.f:104

◆ sgesvdx()

subroutine sgesvdx ( character jobu,
character jobvt,
character range,
integer m,
integer n,
real, dimension( lda, * ) a,
integer lda,
real vl,
real vu,
integer il,
integer iu,
integer ns,
real, dimension( * ) s,
real, dimension( ldu, * ) u,
integer ldu,
real, dimension( ldvt, * ) vt,
integer ldvt,
real, dimension( * ) work,
integer lwork,
integer, dimension( * ) iwork,
integer info )

SGESVDX computes the singular value decomposition (SVD) for GE matrices

Download SGESVDX + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!>  SGESVDX computes the singular value decomposition (SVD) of a real
!>  M-by-N matrix A, optionally computing the left and/or right singular
!>  vectors. The SVD is written
!>
!>      A = U * SIGMA * transpose(V)
!>
!>  where SIGMA is an M-by-N matrix which is zero except for its
!>  min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
!>  V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
!>  are the singular values of A; they are real and non-negative, and
!>  are returned in descending order.  The first min(m,n) columns of
!>  U and V are the left and right singular vectors of A.
!>
!>  SGESVDX uses an eigenvalue problem for obtaining the SVD, which
!>  allows for the computation of a subset of singular values and
!>  vectors. See SBDSVDX for details.
!>
!>  Note that the routine returns V**T, not V.
!> 
Parameters
[in]JOBU
!>          JOBU is CHARACTER*1
!>          Specifies options for computing all or part of the matrix U:
!>          = 'V':  the first min(m,n) columns of U (the left singular
!>                  vectors) or as specified by RANGE are returned in
!>                  the array U;
!>          = 'N':  no columns of U (no left singular vectors) are
!>                  computed.
!> 
[in]JOBVT
!>          JOBVT is CHARACTER*1
!>           Specifies options for computing all or part of the matrix
!>           V**T:
!>           = 'V':  the first min(m,n) rows of V**T (the right singular
!>                   vectors) or as specified by RANGE are returned in
!>                   the array VT;
!>           = 'N':  no rows of V**T (no right singular vectors) are
!>                   computed.
!> 
[in]RANGE
!>          RANGE is CHARACTER*1
!>          = 'A': all singular values will be found.
!>          = 'V': all singular values in the half-open interval (VL,VU]
!>                 will be found.
!>          = 'I': the IL-th through IU-th singular values will be found.
!> 
[in]M
!>          M is INTEGER
!>          The number of rows of the input matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>          The number of columns of the input matrix A.  N >= 0.
!> 
[in,out]A
!>          A is REAL array, dimension (LDA,N)
!>          On entry, the M-by-N matrix A.
!>          On exit, the contents of A are destroyed.
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A.  LDA >= max(1,M).
!> 
[in]VL
!>          VL is REAL
!>          If RANGE='V', the lower bound of the interval to
!>          be searched for singular values. VU > VL.
!>          Not referenced if RANGE = 'A' or 'I'.
!> 
[in]VU
!>          VU is REAL
!>          If RANGE='V', the upper bound of the interval to
!>          be searched for singular values. VU > VL.
!>          Not referenced if RANGE = 'A' or 'I'.
!> 
[in]IL
!>          IL is INTEGER
!>          If RANGE='I', the index of the
!>          smallest singular value to be returned.
!>          1 <= IL <= IU <= min(M,N), if min(M,N) > 0.
!>          Not referenced if RANGE = 'A' or 'V'.
!> 
[in]IU
!>          IU is INTEGER
!>          If RANGE='I', the index of the
!>          largest singular value to be returned.
!>          1 <= IL <= IU <= min(M,N), if min(M,N) > 0.
!>          Not referenced if RANGE = 'A' or 'V'.
!> 
[out]NS
!>          NS is INTEGER
!>          The total number of singular values found,
!>          0 <= NS <= min(M,N).
!>          If RANGE = 'A', NS = min(M,N); if RANGE = 'I', NS = IU-IL+1.
!> 
[out]S
!>          S is REAL array, dimension (min(M,N))
!>          The singular values of A, sorted so that S(i) >= S(i+1).
!> 
[out]U
!>          U is REAL array, dimension (LDU,UCOL)
!>          If JOBU = 'V', U contains columns of U (the left singular
!>          vectors, stored columnwise) as specified by RANGE; if
!>          JOBU = 'N', U is not referenced.
!>          Note: The user must ensure that UCOL >= NS; if RANGE = 'V',
!>          the exact value of NS is not known in advance and an upper
!>          bound must be used.
!> 
[in]LDU
!>          LDU is INTEGER
!>          The leading dimension of the array U.  LDU >= 1; if
!>          JOBU = 'V', LDU >= M.
!> 
[out]VT
!>          VT is REAL array, dimension (LDVT,N)
!>          If JOBVT = 'V', VT contains the rows of V**T (the right singular
!>          vectors, stored rowwise) as specified by RANGE; if JOBVT = 'N',
!>          VT is not referenced.
!>          Note: The user must ensure that LDVT >= NS; if RANGE = 'V',
!>          the exact value of NS is not known in advance and an upper
!>          bound must be used.
!> 
[in]LDVT
!>          LDVT is INTEGER
!>          The leading dimension of the array VT.  LDVT >= 1; if
!>          JOBVT = 'V', LDVT >= NS (see above).
!> 
[out]WORK
!>          WORK is REAL array, dimension (MAX(1,LWORK))
!>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          The dimension of the array WORK.
!>          LWORK >= MAX(1,MIN(M,N)*(MIN(M,N)+4)) for the paths (see
!>          comments inside the code):
!>             - PATH 1  (M much larger than N)
!>             - PATH 1t (N much larger than M)
!>          LWORK >= MAX(1,MIN(M,N)*2+MAX(M,N)) for the other paths.
!>          For good performance, LWORK should generally be larger.
!>
!>          If LWORK = -1, then a workspace query is assumed; the routine
!>          only calculates the optimal size of the WORK array, returns
!>          this value as the first entry of the WORK array, and no error
!>          message related to LWORK is issued by XERBLA.
!> 
[out]IWORK
!>          IWORK is INTEGER array, dimension (12*MIN(M,N))
!>          If INFO = 0, the first NS elements of IWORK are zero. If INFO > 0,
!>          then IWORK contains the indices of the eigenvectors that failed
!>          to converge in SBDSVDX/SSTEVX.
!> 
[out]INFO
!>     INFO is INTEGER
!>           = 0:  successful exit
!>           < 0:  if INFO = -i, the i-th argument had an illegal value
!>           > 0:  if INFO = i, then i eigenvectors failed to converge
!>                 in SBDSVDX/SSTEVX.
!>                 if INFO = N*2 + 1, an internal error occurred in
!>                 SBDSVDX
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 260 of file sgesvdx.f.

263*
264* -- LAPACK driver routine --
265* -- LAPACK is a software package provided by Univ. of Tennessee, --
266* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
267*
268* .. Scalar Arguments ..
269 CHARACTER JOBU, JOBVT, RANGE
270 INTEGER IL, INFO, IU, LDA, LDU, LDVT, LWORK, M, N, NS
271 REAL VL, VU
272* ..
273* .. Array Arguments ..
274 INTEGER IWORK( * )
275 REAL A( LDA, * ), S( * ), U( LDU, * ),
276 $ VT( LDVT, * ), WORK( * )
277* ..
278*
279* =====================================================================
280*
281* .. Parameters ..
282 REAL ZERO, ONE
283 parameter( zero = 0.0e0, one = 1.0e0 )
284* ..
285* .. Local Scalars ..
286 CHARACTER JOBZ, RNGTGK
287 LOGICAL ALLS, INDS, LQUERY, VALS, WANTU, WANTVT
288 INTEGER I, ID, IE, IERR, ILQF, ILTGK, IQRF, ISCL,
289 $ ITAU, ITAUP, ITAUQ, ITEMP, ITGKZ, IUTGK,
290 $ J, MAXWRK, MINMN, MINWRK, MNTHR
291 REAL ABSTOL, ANRM, BIGNUM, EPS, SMLNUM
292* ..
293* .. Local Arrays ..
294 REAL DUM( 1 )
295* ..
296* .. External Subroutines ..
297 EXTERNAL sbdsvdx, sgebrd, sgelqf, sgeqrf, slacpy,
299 $ scopy, xerbla
300* ..
301* .. External Functions ..
302 LOGICAL LSAME
303 INTEGER ILAENV
304 REAL SLAMCH, SLANGE
305 EXTERNAL lsame, ilaenv, slamch, slange
306* ..
307* .. Intrinsic Functions ..
308 INTRINSIC max, min, sqrt
309* ..
310* .. Executable Statements ..
311*
312* Test the input arguments.
313*
314 ns = 0
315 info = 0
316 abstol = 2*slamch('S')
317 lquery = ( lwork.EQ.-1 )
318 minmn = min( m, n )
319
320 wantu = lsame( jobu, 'V' )
321 wantvt = lsame( jobvt, 'V' )
322 IF( wantu .OR. wantvt ) THEN
323 jobz = 'V'
324 ELSE
325 jobz = 'N'
326 END IF
327 alls = lsame( range, 'A' )
328 vals = lsame( range, 'V' )
329 inds = lsame( range, 'I' )
330*
331 info = 0
332 IF( .NOT.lsame( jobu, 'V' ) .AND.
333 $ .NOT.lsame( jobu, 'N' ) ) THEN
334 info = -1
335 ELSE IF( .NOT.lsame( jobvt, 'V' ) .AND.
336 $ .NOT.lsame( jobvt, 'N' ) ) THEN
337 info = -2
338 ELSE IF( .NOT.( alls .OR. vals .OR. inds ) ) THEN
339 info = -3
340 ELSE IF( m.LT.0 ) THEN
341 info = -4
342 ELSE IF( n.LT.0 ) THEN
343 info = -5
344 ELSE IF( m.GT.lda ) THEN
345 info = -7
346 ELSE IF( minmn.GT.0 ) THEN
347 IF( vals ) THEN
348 IF( vl.LT.zero ) THEN
349 info = -8
350 ELSE IF( vu.LE.vl ) THEN
351 info = -9
352 END IF
353 ELSE IF( inds ) THEN
354 IF( il.LT.1 .OR. il.GT.max( 1, minmn ) ) THEN
355 info = -10
356 ELSE IF( iu.LT.min( minmn, il ) .OR. iu.GT.minmn ) THEN
357 info = -11
358 END IF
359 END IF
360 IF( info.EQ.0 ) THEN
361 IF( wantu .AND. ldu.LT.m ) THEN
362 info = -15
363 ELSE IF( wantvt ) THEN
364 IF( inds ) THEN
365 IF( ldvt.LT.iu-il+1 ) THEN
366 info = -17
367 END IF
368 ELSE IF( ldvt.LT.minmn ) THEN
369 info = -17
370 END IF
371 END IF
372 END IF
373 END IF
374*
375* Compute workspace
376* (Note: Comments in the code beginning "Workspace:" describe the
377* minimal amount of workspace needed at that point in the code,
378* as well as the preferred amount for good performance.
379* NB refers to the optimal block size for the immediately
380* following subroutine, as returned by ILAENV.)
381*
382 IF( info.EQ.0 ) THEN
383 minwrk = 1
384 maxwrk = 1
385 IF( minmn.GT.0 ) THEN
386 IF( m.GE.n ) THEN
387 mnthr = ilaenv( 6, 'SGESVD', jobu // jobvt, m, n, 0, 0 )
388 IF( m.GE.mnthr ) THEN
389*
390* Path 1 (M much larger than N)
391*
392 maxwrk = n +
393 $ n*ilaenv( 1, 'SGEQRF', ' ', m, n, -1, -1 )
394 maxwrk = max( maxwrk, n*(n+5) + 2*n*
395 $ ilaenv( 1, 'SGEBRD', ' ', n, n, -1, -1 ) )
396 IF (wantu) THEN
397 maxwrk = max(maxwrk,n*(n*3+6)+n*
398 $ ilaenv( 1, 'SORMQR', ' ', n, n, -1, -1 ) )
399 END IF
400 IF (wantvt) THEN
401 maxwrk = max(maxwrk,n*(n*3+6)+n*
402 $ ilaenv( 1, 'SORMLQ', ' ', n, n, -1, -1 ) )
403 END IF
404 minwrk = n*(n*3+20)
405 ELSE
406*
407* Path 2 (M at least N, but not much larger)
408*
409 maxwrk = 4*n + ( m+n )*
410 $ ilaenv( 1, 'SGEBRD', ' ', m, n, -1, -1 )
411 IF (wantu) THEN
412 maxwrk = max(maxwrk,n*(n*2+5)+n*
413 $ ilaenv( 1, 'SORMQR', ' ', n, n, -1, -1 ) )
414 END IF
415 IF (wantvt) THEN
416 maxwrk = max(maxwrk,n*(n*2+5)+n*
417 $ ilaenv( 1, 'SORMLQ', ' ', n, n, -1, -1 ) )
418 END IF
419 minwrk = max(n*(n*2+19),4*n+m)
420 END IF
421 ELSE
422 mnthr = ilaenv( 6, 'SGESVD', jobu // jobvt, m, n, 0, 0 )
423 IF( n.GE.mnthr ) THEN
424*
425* Path 1t (N much larger than M)
426*
427 maxwrk = m +
428 $ m*ilaenv( 1, 'SGELQF', ' ', m, n, -1, -1 )
429 maxwrk = max( maxwrk, m*(m+5) + 2*m*
430 $ ilaenv( 1, 'SGEBRD', ' ', m, m, -1, -1 ) )
431 IF (wantu) THEN
432 maxwrk = max(maxwrk,m*(m*3+6)+m*
433 $ ilaenv( 1, 'SORMQR', ' ', m, m, -1, -1 ) )
434 END IF
435 IF (wantvt) THEN
436 maxwrk = max(maxwrk,m*(m*3+6)+m*
437 $ ilaenv( 1, 'SORMLQ', ' ', m, m, -1, -1 ) )
438 END IF
439 minwrk = m*(m*3+20)
440 ELSE
441*
442* Path 2t (N at least M, but not much larger)
443*
444 maxwrk = 4*m + ( m+n )*
445 $ ilaenv( 1, 'SGEBRD', ' ', m, n, -1, -1 )
446 IF (wantu) THEN
447 maxwrk = max(maxwrk,m*(m*2+5)+m*
448 $ ilaenv( 1, 'SORMQR', ' ', m, m, -1, -1 ) )
449 END IF
450 IF (wantvt) THEN
451 maxwrk = max(maxwrk,m*(m*2+5)+m*
452 $ ilaenv( 1, 'SORMLQ', ' ', m, m, -1, -1 ) )
453 END IF
454 minwrk = max(m*(m*2+19),4*m+n)
455 END IF
456 END IF
457 END IF
458 maxwrk = max( maxwrk, minwrk )
459 work( 1 ) = real( maxwrk )
460*
461 IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
462 info = -19
463 END IF
464 END IF
465*
466 IF( info.NE.0 ) THEN
467 CALL xerbla( 'SGESVDX', -info )
468 RETURN
469 ELSE IF( lquery ) THEN
470 RETURN
471 END IF
472*
473* Quick return if possible
474*
475 IF( m.EQ.0 .OR. n.EQ.0 ) THEN
476 RETURN
477 END IF
478*
479* Set singular values indices accord to RANGE.
480*
481 IF( alls ) THEN
482 rngtgk = 'I'
483 iltgk = 1
484 iutgk = min( m, n )
485 ELSE IF( inds ) THEN
486 rngtgk = 'I'
487 iltgk = il
488 iutgk = iu
489 ELSE
490 rngtgk = 'V'
491 iltgk = 0
492 iutgk = 0
493 END IF
494*
495* Get machine constants
496*
497 eps = slamch( 'P' )
498 smlnum = sqrt( slamch( 'S' ) ) / eps
499 bignum = one / smlnum
500*
501* Scale A if max element outside range [SMLNUM,BIGNUM]
502*
503 anrm = slange( 'M', m, n, a, lda, dum )
504 iscl = 0
505 IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
506 iscl = 1
507 CALL slascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, info )
508 ELSE IF( anrm.GT.bignum ) THEN
509 iscl = 1
510 CALL slascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info )
511 END IF
512*
513 IF( m.GE.n ) THEN
514*
515* A has at least as many rows as columns. If A has sufficiently
516* more rows than columns, first reduce A using the QR
517* decomposition.
518*
519 IF( m.GE.mnthr ) THEN
520*
521* Path 1 (M much larger than N):
522* A = Q * R = Q * ( QB * B * PB**T )
523* = Q * ( QB * ( UB * S * VB**T ) * PB**T )
524* U = Q * QB * UB; V**T = VB**T * PB**T
525*
526* Compute A=Q*R
527* (Workspace: need 2*N, prefer N+N*NB)
528*
529 itau = 1
530 itemp = itau + n
531 CALL sgeqrf( m, n, a, lda, work( itau ), work( itemp ),
532 $ lwork-itemp+1, info )
533*
534* Copy R into WORK and bidiagonalize it:
535* (Workspace: need N*N+5*N, prefer N*N+4*N+2*N*NB)
536*
537 iqrf = itemp
538 id = iqrf + n*n
539 ie = id + n
540 itauq = ie + n
541 itaup = itauq + n
542 itemp = itaup + n
543 CALL slacpy( 'U', n, n, a, lda, work( iqrf ), n )
544 CALL slaset( 'L', n-1, n-1, zero, zero, work( iqrf+1 ), n )
545 CALL sgebrd( n, n, work( iqrf ), n, work( id ), work( ie ),
546 $ work( itauq ), work( itaup ), work( itemp ),
547 $ lwork-itemp+1, info )
548*
549* Solve eigenvalue problem TGK*Z=Z*S.
550* (Workspace: need 14*N + 2*N*(N+1))
551*
552 itgkz = itemp
553 itemp = itgkz + n*(n*2+1)
554 CALL sbdsvdx( 'U', jobz, rngtgk, n, work( id ), work( ie ),
555 $ vl, vu, iltgk, iutgk, ns, s, work( itgkz ),
556 $ n*2, work( itemp ), iwork, info)
557*
558* If needed, compute left singular vectors.
559*
560 IF( wantu ) THEN
561 j = itgkz
562 DO i = 1, ns
563 CALL scopy( n, work( j ), 1, u( 1,i ), 1 )
564 j = j + n*2
565 END DO
566 CALL slaset( 'A', m-n, ns, zero, zero, u( n+1,1 ), ldu )
567*
568* Call SORMBR to compute QB*UB.
569* (Workspace in WORK( ITEMP ): need N, prefer N*NB)
570*
571 CALL sormbr( 'Q', 'L', 'N', n, ns, n, work( iqrf ), n,
572 $ work( itauq ), u, ldu, work( itemp ),
573 $ lwork-itemp+1, info )
574*
575* Call SORMQR to compute Q*(QB*UB).
576* (Workspace in WORK( ITEMP ): need N, prefer N*NB)
577*
578 CALL sormqr( 'L', 'N', m, ns, n, a, lda,
579 $ work( itau ), u, ldu, work( itemp ),
580 $ lwork-itemp+1, info )
581 END IF
582*
583* If needed, compute right singular vectors.
584*
585 IF( wantvt) THEN
586 j = itgkz + n
587 DO i = 1, ns
588 CALL scopy( n, work( j ), 1, vt( i,1 ), ldvt )
589 j = j + n*2
590 END DO
591*
592* Call SORMBR to compute VB**T * PB**T
593* (Workspace in WORK( ITEMP ): need N, prefer N*NB)
594*
595 CALL sormbr( 'P', 'R', 'T', ns, n, n, work( iqrf ), n,
596 $ work( itaup ), vt, ldvt, work( itemp ),
597 $ lwork-itemp+1, info )
598 END IF
599 ELSE
600*
601* Path 2 (M at least N, but not much larger)
602* Reduce A to bidiagonal form without QR decomposition
603* A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T
604* U = QB * UB; V**T = VB**T * PB**T
605*
606* Bidiagonalize A
607* (Workspace: need 4*N+M, prefer 4*N+(M+N)*NB)
608*
609 id = 1
610 ie = id + n
611 itauq = ie + n
612 itaup = itauq + n
613 itemp = itaup + n
614 CALL sgebrd( m, n, a, lda, work( id ), work( ie ),
615 $ work( itauq ), work( itaup ), work( itemp ),
616 $ lwork-itemp+1, info )
617*
618* Solve eigenvalue problem TGK*Z=Z*S.
619* (Workspace: need 14*N + 2*N*(N+1))
620*
621 itgkz = itemp
622 itemp = itgkz + n*(n*2+1)
623 CALL sbdsvdx( 'U', jobz, rngtgk, n, work( id ), work( ie ),
624 $ vl, vu, iltgk, iutgk, ns, s, work( itgkz ),
625 $ n*2, work( itemp ), iwork, info)
626*
627* If needed, compute left singular vectors.
628*
629 IF( wantu ) THEN
630 j = itgkz
631 DO i = 1, ns
632 CALL scopy( n, work( j ), 1, u( 1,i ), 1 )
633 j = j + n*2
634 END DO
635 CALL slaset( 'A', m-n, ns, zero, zero, u( n+1,1 ), ldu )
636*
637* Call SORMBR to compute QB*UB.
638* (Workspace in WORK( ITEMP ): need N, prefer N*NB)
639*
640 CALL sormbr( 'Q', 'L', 'N', m, ns, n, a, lda,
641 $ work( itauq ), u, ldu, work( itemp ),
642 $ lwork-itemp+1, ierr )
643 END IF
644*
645* If needed, compute right singular vectors.
646*
647 IF( wantvt) THEN
648 j = itgkz + n
649 DO i = 1, ns
650 CALL scopy( n, work( j ), 1, vt( i,1 ), ldvt )
651 j = j + n*2
652 END DO
653*
654* Call SORMBR to compute VB**T * PB**T
655* (Workspace in WORK( ITEMP ): need N, prefer N*NB)
656*
657 CALL sormbr( 'P', 'R', 'T', ns, n, n, a, lda,
658 $ work( itaup ), vt, ldvt, work( itemp ),
659 $ lwork-itemp+1, ierr )
660 END IF
661 END IF
662 ELSE
663*
664* A has more columns than rows. If A has sufficiently more
665* columns than rows, first reduce A using the LQ decomposition.
666*
667 IF( n.GE.mnthr ) THEN
668*
669* Path 1t (N much larger than M):
670* A = L * Q = ( QB * B * PB**T ) * Q
671* = ( QB * ( UB * S * VB**T ) * PB**T ) * Q
672* U = QB * UB ; V**T = VB**T * PB**T * Q
673*
674* Compute A=L*Q
675* (Workspace: need 2*M, prefer M+M*NB)
676*
677 itau = 1
678 itemp = itau + m
679 CALL sgelqf( m, n, a, lda, work( itau ), work( itemp ),
680 $ lwork-itemp+1, info )
681
682* Copy L into WORK and bidiagonalize it:
683* (Workspace in WORK( ITEMP ): need M*M+5*N, prefer M*M+4*M+2*M*NB)
684*
685 ilqf = itemp
686 id = ilqf + m*m
687 ie = id + m
688 itauq = ie + m
689 itaup = itauq + m
690 itemp = itaup + m
691 CALL slacpy( 'L', m, m, a, lda, work( ilqf ), m )
692 CALL slaset( 'U', m-1, m-1, zero, zero, work( ilqf+m ), m )
693 CALL sgebrd( m, m, work( ilqf ), m, work( id ), work( ie ),
694 $ work( itauq ), work( itaup ), work( itemp ),
695 $ lwork-itemp+1, info )
696*
697* Solve eigenvalue problem TGK*Z=Z*S.
698* (Workspace: need 2*M*M+14*M)
699*
700 itgkz = itemp
701 itemp = itgkz + m*(m*2+1)
702 CALL sbdsvdx( 'U', jobz, rngtgk, m, work( id ), work( ie ),
703 $ vl, vu, iltgk, iutgk, ns, s, work( itgkz ),
704 $ m*2, work( itemp ), iwork, info)
705*
706* If needed, compute left singular vectors.
707*
708 IF( wantu ) THEN
709 j = itgkz
710 DO i = 1, ns
711 CALL scopy( m, work( j ), 1, u( 1,i ), 1 )
712 j = j + m*2
713 END DO
714*
715* Call SORMBR to compute QB*UB.
716* (Workspace in WORK( ITEMP ): need M, prefer M*NB)
717*
718 CALL sormbr( 'Q', 'L', 'N', m, ns, m, work( ilqf ), m,
719 $ work( itauq ), u, ldu, work( itemp ),
720 $ lwork-itemp+1, info )
721 END IF
722*
723* If needed, compute right singular vectors.
724*
725 IF( wantvt) THEN
726 j = itgkz + m
727 DO i = 1, ns
728 CALL scopy( m, work( j ), 1, vt( i,1 ), ldvt )
729 j = j + m*2
730 END DO
731 CALL slaset( 'A', ns, n-m, zero, zero, vt( 1,m+1 ), ldvt)
732*
733* Call SORMBR to compute (VB**T)*(PB**T)
734* (Workspace in WORK( ITEMP ): need M, prefer M*NB)
735*
736 CALL sormbr( 'P', 'R', 'T', ns, m, m, work( ilqf ), m,
737 $ work( itaup ), vt, ldvt, work( itemp ),
738 $ lwork-itemp+1, info )
739*
740* Call SORMLQ to compute ((VB**T)*(PB**T))*Q.
741* (Workspace in WORK( ITEMP ): need M, prefer M*NB)
742*
743 CALL sormlq( 'R', 'N', ns, n, m, a, lda,
744 $ work( itau ), vt, ldvt, work( itemp ),
745 $ lwork-itemp+1, info )
746 END IF
747 ELSE
748*
749* Path 2t (N greater than M, but not much larger)
750* Reduce to bidiagonal form without LQ decomposition
751* A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T
752* U = QB * UB; V**T = VB**T * PB**T
753*
754* Bidiagonalize A
755* (Workspace: need 4*M+N, prefer 4*M+(M+N)*NB)
756*
757 id = 1
758 ie = id + m
759 itauq = ie + m
760 itaup = itauq + m
761 itemp = itaup + m
762 CALL sgebrd( m, n, a, lda, work( id ), work( ie ),
763 $ work( itauq ), work( itaup ), work( itemp ),
764 $ lwork-itemp+1, info )
765*
766* Solve eigenvalue problem TGK*Z=Z*S.
767* (Workspace: need 2*M*M+14*M)
768*
769 itgkz = itemp
770 itemp = itgkz + m*(m*2+1)
771 CALL sbdsvdx( 'L', jobz, rngtgk, m, work( id ), work( ie ),
772 $ vl, vu, iltgk, iutgk, ns, s, work( itgkz ),
773 $ m*2, work( itemp ), iwork, info)
774*
775* If needed, compute left singular vectors.
776*
777 IF( wantu ) THEN
778 j = itgkz
779 DO i = 1, ns
780 CALL scopy( m, work( j ), 1, u( 1,i ), 1 )
781 j = j + m*2
782 END DO
783*
784* Call SORMBR to compute QB*UB.
785* (Workspace in WORK( ITEMP ): need M, prefer M*NB)
786*
787 CALL sormbr( 'Q', 'L', 'N', m, ns, n, a, lda,
788 $ work( itauq ), u, ldu, work( itemp ),
789 $ lwork-itemp+1, info )
790 END IF
791*
792* If needed, compute right singular vectors.
793*
794 IF( wantvt) THEN
795 j = itgkz + m
796 DO i = 1, ns
797 CALL scopy( m, work( j ), 1, vt( i,1 ), ldvt )
798 j = j + m*2
799 END DO
800 CALL slaset( 'A', ns, n-m, zero, zero, vt( 1,m+1 ), ldvt)
801*
802* Call SORMBR to compute VB**T * PB**T
803* (Workspace in WORK( ITEMP ): need M, prefer M*NB)
804*
805 CALL sormbr( 'P', 'R', 'T', ns, n, m, a, lda,
806 $ work( itaup ), vt, ldvt, work( itemp ),
807 $ lwork-itemp+1, info )
808 END IF
809 END IF
810 END IF
811*
812* Undo scaling if necessary
813*
814 IF( iscl.EQ.1 ) THEN
815 IF( anrm.GT.bignum )
816 $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn, 1,
817 $ s, minmn, info )
818 IF( anrm.LT.smlnum )
819 $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn, 1,
820 $ s, minmn, info )
821 END IF
822*
823* Return optimal workspace in WORK(1)
824*
825 work( 1 ) = real( maxwrk )
826*
827 RETURN
828*
829* End of SGESVDX
830*
subroutine sbdsvdx(uplo, jobz, range, n, d, e, vl, vu, il, iu, ns, s, z, ldz, work, iwork, info)
SBDSVDX
Definition sbdsvdx.f:226
initmumps id

◆ sggsvd3()

subroutine sggsvd3 ( character jobu,
character jobv,
character jobq,
integer m,
integer n,
integer p,
integer k,
integer l,
real, dimension( lda, * ) a,
integer lda,
real, dimension( ldb, * ) b,
integer ldb,
real, dimension( * ) alpha,
real, dimension( * ) beta,
real, dimension( ldu, * ) u,
integer ldu,
real, dimension( ldv, * ) v,
integer ldv,
real, dimension( ldq, * ) q,
integer ldq,
real, dimension( * ) work,
integer lwork,
integer, dimension( * ) iwork,
integer info )

SGGSVD3 computes the singular value decomposition (SVD) for OTHER matrices

Download SGGSVD3 + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> SGGSVD3 computes the generalized singular value decomposition (GSVD)
!> of an M-by-N real matrix A and P-by-N real matrix B:
!>
!>       U**T*A*Q = D1*( 0 R ),    V**T*B*Q = D2*( 0 R )
!>
!> where U, V and Q are orthogonal matrices.
!> Let K+L = the effective numerical rank of the matrix (A**T,B**T)**T,
!> then R is a K+L-by-K+L nonsingular upper triangular matrix, D1 and
!> D2 are M-by-(K+L) and P-by-(K+L)  matrices and of the
!> following structures, respectively:
!>
!> If M-K-L >= 0,
!>
!>                     K  L
!>        D1 =     K ( I  0 )
!>                 L ( 0  C )
!>             M-K-L ( 0  0 )
!>
!>                   K  L
!>        D2 =   L ( 0  S )
!>             P-L ( 0  0 )
!>
!>                 N-K-L  K    L
!>   ( 0 R ) = K (  0   R11  R12 )
!>             L (  0    0   R22 )
!>
!> where
!>
!>   C = diag( ALPHA(K+1), ... , ALPHA(K+L) ),
!>   S = diag( BETA(K+1),  ... , BETA(K+L) ),
!>   C**2 + S**2 = I.
!>
!>   R is stored in A(1:K+L,N-K-L+1:N) on exit.
!>
!> If M-K-L < 0,
!>
!>                   K M-K K+L-M
!>        D1 =   K ( I  0    0   )
!>             M-K ( 0  C    0   )
!>
!>                     K M-K K+L-M
!>        D2 =   M-K ( 0  S    0  )
!>             K+L-M ( 0  0    I  )
!>               P-L ( 0  0    0  )
!>
!>                    N-K-L  K   M-K  K+L-M
!>   ( 0 R ) =     K ( 0    R11  R12  R13  )
!>               M-K ( 0     0   R22  R23  )
!>             K+L-M ( 0     0    0   R33  )
!>
!> where
!>
!>   C = diag( ALPHA(K+1), ... , ALPHA(M) ),
!>   S = diag( BETA(K+1),  ... , BETA(M) ),
!>   C**2 + S**2 = I.
!>
!>   (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored
!>   ( 0  R22 R23 )
!>   in B(M-K+1:L,N+M-K-L+1:N) on exit.
!>
!> The routine computes C, S, R, and optionally the orthogonal
!> transformation matrices U, V and Q.
!>
!> In particular, if B is an N-by-N nonsingular matrix, then the GSVD of
!> A and B implicitly gives the SVD of A*inv(B):
!>                      A*inv(B) = U*(D1*inv(D2))*V**T.
!> If ( A**T,B**T)**T  has orthonormal columns, then the GSVD of A and B is
!> also equal to the CS decomposition of A and B. Furthermore, the GSVD
!> can be used to derive the solution of the eigenvalue problem:
!>                      A**T*A x = lambda* B**T*B x.
!> In some literature, the GSVD of A and B is presented in the form
!>                  U**T*A*X = ( 0 D1 ),   V**T*B*X = ( 0 D2 )
!> where U and V are orthogonal and X is nonsingular, D1 and D2 are
!> ``diagonal''.  The former GSVD form can be converted to the latter
!> form by taking the nonsingular matrix X as
!>
!>                      X = Q*( I   0    )
!>                            ( 0 inv(R) ).
!> 
Parameters
[in]JOBU
!>          JOBU is CHARACTER*1
!>          = 'U':  Orthogonal matrix U is computed;
!>          = 'N':  U is not computed.
!> 
[in]JOBV
!>          JOBV is CHARACTER*1
!>          = 'V':  Orthogonal matrix V is computed;
!>          = 'N':  V is not computed.
!> 
[in]JOBQ
!>          JOBQ is CHARACTER*1
!>          = 'Q':  Orthogonal matrix Q is computed;
!>          = 'N':  Q is not computed.
!> 
[in]M
!>          M is INTEGER
!>          The number of rows of the matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>          The number of columns of the matrices A and B.  N >= 0.
!> 
[in]P
!>          P is INTEGER
!>          The number of rows of the matrix B.  P >= 0.
!> 
[out]K
!>          K is INTEGER
!> 
[out]L
!>          L is INTEGER
!>
!>          On exit, K and L specify the dimension of the subblocks
!>          described in Purpose.
!>          K + L = effective numerical rank of (A**T,B**T)**T.
!> 
[in,out]A
!>          A is REAL array, dimension (LDA,N)
!>          On entry, the M-by-N matrix A.
!>          On exit, A contains the triangular matrix R, or part of R.
!>          See Purpose for details.
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A. LDA >= max(1,M).
!> 
[in,out]B
!>          B is REAL array, dimension (LDB,N)
!>          On entry, the P-by-N matrix B.
!>          On exit, B contains the triangular matrix R if M-K-L < 0.
!>          See Purpose for details.
!> 
[in]LDB
!>          LDB is INTEGER
!>          The leading dimension of the array B. LDB >= max(1,P).
!> 
[out]ALPHA
!>          ALPHA is REAL array, dimension (N)
!> 
[out]BETA
!>          BETA is REAL array, dimension (N)
!>
!>          On exit, ALPHA and BETA contain the generalized singular
!>          value pairs of A and B;
!>            ALPHA(1:K) = 1,
!>            BETA(1:K)  = 0,
!>          and if M-K-L >= 0,
!>            ALPHA(K+1:K+L) = C,
!>            BETA(K+1:K+L)  = S,
!>          or if M-K-L < 0,
!>            ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0
!>            BETA(K+1:M) =S, BETA(M+1:K+L) =1
!>          and
!>            ALPHA(K+L+1:N) = 0
!>            BETA(K+L+1:N)  = 0
!> 
[out]U
!>          U is REAL array, dimension (LDU,M)
!>          If JOBU = 'U', U contains the M-by-M orthogonal matrix U.
!>          If JOBU = 'N', U is not referenced.
!> 
[in]LDU
!>          LDU is INTEGER
!>          The leading dimension of the array U. LDU >= max(1,M) if
!>          JOBU = 'U'; LDU >= 1 otherwise.
!> 
[out]V
!>          V is REAL array, dimension (LDV,P)
!>          If JOBV = 'V', V contains the P-by-P orthogonal matrix V.
!>          If JOBV = 'N', V is not referenced.
!> 
[in]LDV
!>          LDV is INTEGER
!>          The leading dimension of the array V. LDV >= max(1,P) if
!>          JOBV = 'V'; LDV >= 1 otherwise.
!> 
[out]Q
!>          Q is REAL array, dimension (LDQ,N)
!>          If JOBQ = 'Q', Q contains the N-by-N orthogonal matrix Q.
!>          If JOBQ = 'N', Q is not referenced.
!> 
[in]LDQ
!>          LDQ is INTEGER
!>          The leading dimension of the array Q. LDQ >= max(1,N) if
!>          JOBQ = 'Q'; LDQ >= 1 otherwise.
!> 
[out]WORK
!>          WORK is REAL array, dimension (MAX(1,LWORK))
!>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          The dimension of the array WORK.
!>
!>          If LWORK = -1, then a workspace query is assumed; the routine
!>          only calculates the optimal size of the WORK array, returns
!>          this value as the first entry of the WORK array, and no error
!>          message related to LWORK is issued by XERBLA.
!> 
[out]IWORK
!>          IWORK is INTEGER array, dimension (N)
!>          On exit, IWORK stores the sorting information. More
!>          precisely, the following loop will sort ALPHA
!>             for I = K+1, min(M,K+L)
!>                 swap ALPHA(I) and ALPHA(IWORK(I))
!>             endfor
!>          such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N).
!> 
[out]INFO
!>          INFO is INTEGER
!>          = 0:  successful exit.
!>          < 0:  if INFO = -i, the i-th argument had an illegal value.
!>          > 0:  if INFO = 1, the Jacobi-type procedure failed to
!>                converge.  For further details, see subroutine STGSJA.
!> 
Internal Parameters:
!>  TOLA    REAL
!>  TOLB    REAL
!>          TOLA and TOLB are the thresholds to determine the effective
!>          rank of (A**T,B**T)**T. Generally, they are set to
!>                   TOLA = MAX(M,N)*norm(A)*MACHEPS,
!>                   TOLB = MAX(P,N)*norm(B)*MACHEPS.
!>          The size of TOLA and TOLB may affect the size of backward
!>          errors of the decomposition.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Contributors:
Ming Gu and Huan Ren, Computer Science Division, University of California at Berkeley, USA
Further Details:
SGGSVD3 replaces the deprecated subroutine SGGSVD.

Definition at line 346 of file sggsvd3.f.

349*
350* -- LAPACK driver routine --
351* -- LAPACK is a software package provided by Univ. of Tennessee, --
352* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
353*
354* .. Scalar Arguments ..
355 CHARACTER JOBQ, JOBU, JOBV
356 INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P,
357 $ LWORK
358* ..
359* .. Array Arguments ..
360 INTEGER IWORK( * )
361 REAL A( LDA, * ), ALPHA( * ), B( LDB, * ),
362 $ BETA( * ), Q( LDQ, * ), U( LDU, * ),
363 $ V( LDV, * ), WORK( * )
364* ..
365*
366* =====================================================================
367*
368* .. Local Scalars ..
369 LOGICAL WANTQ, WANTU, WANTV, LQUERY
370 INTEGER I, IBND, ISUB, J, NCYCLE, LWKOPT
371 REAL ANORM, BNORM, SMAX, TEMP, TOLA, TOLB, ULP, UNFL
372* ..
373* .. External Functions ..
374 LOGICAL LSAME
375 REAL SLAMCH, SLANGE
376 EXTERNAL lsame, slamch, slange
377* ..
378* .. External Subroutines ..
379 EXTERNAL scopy, sggsvp3, stgsja, xerbla
380* ..
381* .. Intrinsic Functions ..
382 INTRINSIC max, min
383* ..
384* .. Executable Statements ..
385*
386* Decode and test the input parameters
387*
388 wantu = lsame( jobu, 'U' )
389 wantv = lsame( jobv, 'V' )
390 wantq = lsame( jobq, 'Q' )
391 lquery = ( lwork.EQ.-1 )
392 lwkopt = 1
393*
394* Test the input arguments
395*
396 info = 0
397 IF( .NOT.( wantu .OR. lsame( jobu, 'N' ) ) ) THEN
398 info = -1
399 ELSE IF( .NOT.( wantv .OR. lsame( jobv, 'N' ) ) ) THEN
400 info = -2
401 ELSE IF( .NOT.( wantq .OR. lsame( jobq, 'N' ) ) ) THEN
402 info = -3
403 ELSE IF( m.LT.0 ) THEN
404 info = -4
405 ELSE IF( n.LT.0 ) THEN
406 info = -5
407 ELSE IF( p.LT.0 ) THEN
408 info = -6
409 ELSE IF( lda.LT.max( 1, m ) ) THEN
410 info = -10
411 ELSE IF( ldb.LT.max( 1, p ) ) THEN
412 info = -12
413 ELSE IF( ldu.LT.1 .OR. ( wantu .AND. ldu.LT.m ) ) THEN
414 info = -16
415 ELSE IF( ldv.LT.1 .OR. ( wantv .AND. ldv.LT.p ) ) THEN
416 info = -18
417 ELSE IF( ldq.LT.1 .OR. ( wantq .AND. ldq.LT.n ) ) THEN
418 info = -20
419 ELSE IF( lwork.LT.1 .AND. .NOT.lquery ) THEN
420 info = -24
421 END IF
422*
423* Compute workspace
424*
425 IF( info.EQ.0 ) THEN
426 CALL sggsvp3( jobu, jobv, jobq, m, p, n, a, lda, b, ldb, tola,
427 $ tolb, k, l, u, ldu, v, ldv, q, ldq, iwork, work,
428 $ work, -1, info )
429 lwkopt = n + int( work( 1 ) )
430 lwkopt = max( 2*n, lwkopt )
431 lwkopt = max( 1, lwkopt )
432 work( 1 ) = real( lwkopt )
433 END IF
434*
435 IF( info.NE.0 ) THEN
436 CALL xerbla( 'SGGSVD3', -info )
437 RETURN
438 END IF
439 IF( lquery ) THEN
440 RETURN
441 ENDIF
442*
443* Compute the Frobenius norm of matrices A and B
444*
445 anorm = slange( '1', m, n, a, lda, work )
446 bnorm = slange( '1', p, n, b, ldb, work )
447*
448* Get machine precision and set up threshold for determining
449* the effective numerical rank of the matrices A and B.
450*
451 ulp = slamch( 'Precision' )
452 unfl = slamch( 'Safe Minimum' )
453 tola = max( m, n )*max( anorm, unfl )*ulp
454 tolb = max( p, n )*max( bnorm, unfl )*ulp
455*
456* Preprocessing
457*
458 CALL sggsvp3( jobu, jobv, jobq, m, p, n, a, lda, b, ldb, tola,
459 $ tolb, k, l, u, ldu, v, ldv, q, ldq, iwork, work,
460 $ work( n+1 ), lwork-n, info )
461*
462* Compute the GSVD of two upper "triangular" matrices
463*
464 CALL stgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b, ldb,
465 $ tola, tolb, alpha, beta, u, ldu, v, ldv, q, ldq,
466 $ work, ncycle, info )
467*
468* Sort the singular values and store the pivot indices in IWORK
469* Copy ALPHA to WORK, then sort ALPHA in WORK
470*
471 CALL scopy( n, alpha, 1, work, 1 )
472 ibnd = min( l, m-k )
473 DO 20 i = 1, ibnd
474*
475* Scan for largest ALPHA(K+I)
476*
477 isub = i
478 smax = work( k+i )
479 DO 10 j = i + 1, ibnd
480 temp = work( k+j )
481 IF( temp.GT.smax ) THEN
482 isub = j
483 smax = temp
484 END IF
485 10 CONTINUE
486 IF( isub.NE.i ) THEN
487 work( k+isub ) = work( k+i )
488 work( k+i ) = smax
489 iwork( k+i ) = k + isub
490 ELSE
491 iwork( k+i ) = k + i
492 END IF
493 20 CONTINUE
494*
495 work( 1 ) = real( lwkopt )
496 RETURN
497*
498* End of SGGSVD3
499*
#define alpha
Definition eval.h:35
subroutine stgsja(jobu, jobv, jobq, m, p, n, k, l, a, lda, b, ldb, tola, tolb, alpha, beta, u, ldu, v, ldv, q, ldq, work, ncycle, info)
STGSJA
Definition stgsja.f:378
subroutine sggsvp3(jobu, jobv, jobq, m, p, n, a, lda, b, ldb, tola, tolb, k, l, u, ldu, v, ldv, q, ldq, iwork, tau, work, lwork, info)
SGGSVP3
Definition sggsvp3.f:272