VARIANTS_2qr_2LL_2sgeqrf_8f_source.html

C> \brief \b SGEQRF VARIANT: left-looking Level 3 BLAS version of the algorithm.

*

*  =========== DOCUMENTATION ===========

*

* Online html documentation available at

*            http://www.netlib.org/lapack/explore-html/

*

*  Definition:

*  ===========

*

*       SUBROUTINE SGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )

*

*       .. Scalar Arguments ..

*       INTEGER            INFO, LDA, LWORK, M, N

*       ..

*       .. Array Arguments ..

*       REAL               A( LDA, * ), TAU( * ), WORK( * )

*       ..

*

*  Purpose

*  =======

*

C>\details \b Purpose:

C>\verbatim

C>

C> SGEQRF computes a QR factorization of a real M-by-N matrix A:

C> A = Q * R.

C>

C> This is the left-looking Level 3 BLAS version of the algorithm.

C>

C>\endverbatim

*

*  Arguments:

*  ==========

*

C> \param[in] M

C> \verbatim

C>          M is INTEGER

C>          The number of rows of the matrix A.  M >= 0.

C> \endverbatim

C>

C> \param[in] N

C> \verbatim

C>          N is INTEGER

C>          The number of columns of the matrix A.  N >= 0.

C> \endverbatim

C>

C> \param[in,out] A

C> \verbatim

C>          A is REAL array, dimension (LDA,N)

C>          On entry, the M-by-N matrix A.

C>          On exit, the elements on and above the diagonal of the array

C>          contain the min(M,N)-by-N upper trapezoidal matrix R (R is

C>          upper triangular if m >= n); the elements below the diagonal,

C>          with the array TAU, represent the orthogonal matrix Q as a

C>          product of min(m,n) elementary reflectors (see Further

C>          Details).

C> \endverbatim

C>

C> \param[in] LDA

C> \verbatim

C>          LDA is INTEGER

C>          The leading dimension of the array A.  LDA >= max(1,M).

C> \endverbatim

C>

C> \param[out] TAU

C> \verbatim

C>          TAU is REAL array, dimension (min(M,N))

C>          The scalar factors of the elementary reflectors (see Further

C>          Details).

C> \endverbatim

C>

C> \param[out] WORK

C> \verbatim

C>          WORK is REAL array, dimension (MAX(1,LWORK))

C>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.

C> \endverbatim

C>

C> \param[in] LWORK

C> \verbatim

C>          LWORK is INTEGER

C> \endverbatim

C> \verbatim

C>          The dimension of the array WORK. The dimension can be divided into three parts.

C> \endverbatim

C> \verbatim

C>          1) The part for the triangular factor T. If the very last T is not bigger

C>             than any of the rest, then this part is NB x ceiling(K/NB), otherwise,

C>             NB x (K-NT), where K = min(M,N) and NT is the dimension of the very last T

C> \endverbatim

C> \verbatim

C>          2) The part for the very last T when T is bigger than any of the rest T.

C>             The size of this part is NT x NT, where NT = K - ceiling ((K-NX)/NB) x NB,

C>             where K = min(M,N), NX is calculated by

C>                   NX = MAX( 0, ILAENV( 3, 'SGEQRF', ' ', M, N, -1, -1 ) )

C> \endverbatim

C> \verbatim

C>          3) The part for dlarfb is of size max((N-M)*K, (N-M)*NB, K*NB, NB*NB)

C> \endverbatim

C> \verbatim

C>          So LWORK = part1 + part2 + part3

C> \endverbatim

C> \verbatim

C>          If LWORK = -1, then a workspace query is assumed; the routine

C>          only calculates the optimal size of the WORK array, returns

C>          this value as the first entry of the WORK array, and no error

C>          message related to LWORK is issued by XERBLA.

C> \endverbatim

C>

C> \param[out] INFO

C> \verbatim

C>          INFO is INTEGER

C>          = 0:  successful exit

C>          < 0:  if INFO = -i, the i-th argument had an illegal value

C> \endverbatim

C>

*

*  Authors:

*  ========

*

C> \author Univ. of Tennessee

C> \author Univ. of California Berkeley

C> \author Univ. of Colorado Denver

C> \author NAG Ltd.

*

C> \date December 2016

*

C> \ingroup variantsGEcomputational

*

*  Further Details

*  ===============

C>\details \b Further \b Details

C> \verbatim

C>

C>  The matrix Q is represented as a product of elementary reflectors

C>

C>     Q = H(1) H(2) . . . H(k), where k = min(m,n).

C>

C>  Each H(i) has the form

C>

C>     H(i) = I - tau * v * v'

C>

C>  where tau is a real scalar, and v is a real vector with

C>  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),

C>  and tau in TAU(i).

C>

C> \endverbatim

C>

*  =====================================================================


      SUBROUTINE sgeqrf ( M, N, A, LDA, TAU, WORK, LWORK, INFO )

*

*  -- LAPACK computational routine --

*  -- LAPACK is a software package provided by Univ. of Tennessee,    --

*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

*

*     .. Scalar Arguments ..

      INTEGER            INFO, LDA, LWORK, M, N

*     ..

*     .. Array Arguments ..

      REAL               A( LDA, * ), TAU( * ), WORK( * )

*     ..

*

*  =====================================================================

*

*     .. Local Scalars ..

      LOGICAL            LQUERY

      INTEGER            I, IB, IINFO, IWS, J, K, LWKOPT, NB,

     $                   NBMIN, NX, LBWORK, NT, LLWORK

*     ..

*     .. External Subroutines ..

      EXTERNAL           sgeqr2, slarfb, slarft, xerbla

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          max, min

*     ..

*     .. External Functions ..

      INTEGER            ILAENV

      REAL               SCEIL

      EXTERNAL           ilaenv, sceil

*     ..

*     .. Executable Statements ..


      info = 0

      nbmin = 2

      nx = 0

      iws = n

      k = min( m, n )

      nb = ilaenv( 1, 'SGEQRF', ' ', m, n, -1, -1 )


      IF( nb.GT.1 .AND. nb.LT.k ) THEN

*

*        Determine when to cross over from blocked to unblocked code.

*

         nx = max( 0, ilaenv( 3, 'SGEQRF', ' ', m, n, -1, -1 ) )

      END IF

*

*     Get NT, the size of the very last T, which is the left-over from in-between K-NX and K to K, eg.:

*

*            NB=3     2NB=6       K=10

*            |        |           |

*      1--2--3--4--5--6--7--8--9--10

*                  |     \________/

*               K-NX=5      NT=4

*

*     So here 4 x 4 is the last T stored in the workspace

*

      nt = k-sceil(real(k-nx)/real(nb))*nb


*

*     optimal workspace = space for dlarfb + space for normal T's + space for the last T

*

      llwork = max(max((n-m)*k, (n-m)*nb), max(k*nb, nb*nb))

      llwork = sceil(real(llwork)/real(nb))


      IF ( nt.GT.nb ) THEN


          lbwork = k-nt

*

*         Optimal workspace for dlarfb = MAX(1,N)*NT

*

          lwkopt = (lbwork+llwork)*nb

          work( 1 ) = (lwkopt+nt*nt)


      ELSE


          lbwork = sceil(real(k)/real(nb))*nb

          lwkopt = (lbwork+llwork-nb)*nb

          work( 1 ) = lwkopt


      END IF


*

*     Test the input arguments

*

      lquery = ( lwork.EQ.-1 )

      IF( m.LT.0 ) THEN

         info = -1

      ELSE IF( n.LT.0 ) THEN

         info = -2

      ELSE IF( lda.LT.max( 1, m ) ) THEN

         info = -4

      ELSE IF( lwork.LT.max( 1, n ) .AND. .NOT.lquery ) THEN

         info = -7

      END IF

      IF( info.NE.0 ) THEN

         CALL xerbla( 'SGEQRF', -info )

         RETURN

      ELSE IF( lquery ) THEN

         RETURN

      END IF

*

*     Quick return if possible

*

      IF( k.EQ.0 ) THEN

         work( 1 ) = 1

         RETURN

      END IF

*

      IF( nb.GT.1 .AND. nb.LT.k ) THEN


         IF( nx.LT.k ) THEN

*

*           Determine if workspace is large enough for blocked code.

*

            IF ( nt.LE.nb ) THEN

                iws = (lbwork+llwork-nb)*nb

            ELSE

                iws = (lbwork+llwork)*nb+nt*nt

            END IF


            IF( lwork.LT.iws ) THEN

*

*              Not enough workspace to use optimal NB:  reduce NB and

*              determine the minimum value of NB.

*

               IF ( nt.LE.nb ) THEN

                    nb = lwork / (llwork+(lbwork-nb))

               ELSE

                    nb = (lwork-nt*nt)/(lbwork+llwork)

               END IF


               nbmin = max( 2, ilaenv( 2, 'SGEQRF', ' ', m, n, -1,

     $                 -1 ) )

            END IF

         END IF

      END IF

*

      IF( nb.GE.nbmin .AND. nb.LT.k .AND. nx.LT.k ) THEN

*

*        Use blocked code initially

*

         DO 10 i = 1, k - nx, nb

            ib = min( k-i+1, nb )

*

*           Update the current column using old T's

*

            DO 20 j = 1, i - nb, nb

*

*              Apply H' to A(J:M,I:I+IB-1) from the left

*

               CALL slarfb( 'Left', 'Transpose', 'Forward',

     $                      'Columnwise', m-j+1, ib, nb,

     $                      a( j, j ), lda, work(j), lbwork,

     $                      a( j, i ), lda, work(lbwork*nb+nt*nt+1),

     $                      ib)


20          CONTINUE

*

*           Compute the QR factorization of the current block

*           A(I:M,I:I+IB-1)

*

            CALL sgeqr2( m-i+1, ib, a( i, i ), lda, tau( i ),

     $                        work(lbwork*nb+nt*nt+1), iinfo )


            IF( i+ib.LE.n ) THEN

*

*              Form the triangular factor of the block reflector

*              H = H(i) H(i+1) . . . H(i+ib-1)

*

               CALL slarft( 'Forward', 'Columnwise', m-i+1, ib,

     $                      a( i, i ), lda, tau( i ),

     $                      work(i), lbwork )

*

            END IF

   10    CONTINUE

      ELSE

         i = 1

      END IF

*

*     Use unblocked code to factor the last or only block.

*

      IF( i.LE.k ) THEN


         IF ( i .NE. 1 )   THEN


             DO 30 j = 1, i - nb, nb

*

*                Apply H' to A(J:M,I:K) from the left

*

                 CALL slarfb( 'Left', 'Transpose', 'Forward',

     $                       'Columnwise', m-j+1, k-i+1, nb,

     $                       a( j, j ), lda, work(j), lbwork,

     $                       a( j, i ), lda, work(lbwork*nb+nt*nt+1),

     $                       k-i+1)

30           CONTINUE


             CALL sgeqr2( m-i+1, k-i+1, a( i, i ), lda, tau( i ),

     $                   work(lbwork*nb+nt*nt+1),iinfo )


         ELSE

*

*        Use unblocked code to factor the last or only block.

*

         CALL sgeqr2( m-i+1, n-i+1, a( i, i ), lda, tau( i ),

     $               work,iinfo )


         END IF

      END IF


*

*     Apply update to the column M+1:N when N > M

*

      IF ( m.LT.n .AND. i.NE.1) THEN

*

*         Form the last triangular factor of the block reflector

*         H = H(i) H(i+1) . . . H(i+ib-1)

*

          IF ( nt .LE. nb ) THEN

               CALL slarft( 'Forward', 'Columnwise', m-i+1, k-i+1,

     $                     a( i, i ), lda, tau( i ), work(i), lbwork )

          ELSE

               CALL slarft( 'Forward', 'Columnwise', m-i+1, k-i+1,

     $                     a( i, i ), lda, tau( i ),

     $                     work(lbwork*nb+1), nt )

          END IF


*

*         Apply H' to A(1:M,M+1:N) from the left

*

          DO 40 j = 1, k-nx, nb


               ib = min( k-j+1, nb )


               CALL slarfb( 'Left', 'Transpose', 'Forward',

     $                     'Columnwise', m-j+1, n-m, ib,

     $                     a( j, j ), lda, work(j), lbwork,

     $                     a( j, m+1 ), lda, work(lbwork*nb+nt*nt+1),

     $                     n-m)


40       CONTINUE


         IF ( nt.LE.nb ) THEN

             CALL slarfb( 'Left', 'Transpose', 'Forward',

     $                   'Columnwise', m-j+1, n-m, k-j+1,

     $                   a( j, j ), lda, work(j), lbwork,

     $                   a( j, m+1 ), lda, work(lbwork*nb+nt*nt+1),

     $                   n-m)

         ELSE

             CALL slarfb( 'Left', 'Transpose', 'Forward',

     $                   'Columnwise', m-j+1, n-m, k-j+1,

     $                   a( j, j ), lda,

     $                   work(lbwork*nb+1),

     $                   nt, a( j, m+1 ), lda, work(lbwork*nb+nt*nt+1),

     $                   n-m)

         END IF


      END IF


      work( 1 ) = iws

      RETURN

*

*     End of SGEQRF

*


      END

ilaenv
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
Definition ilaenv.f:162

xerbla
subroutine xerbla(srname, info)
XERBLA
Definition xerbla.f:60

sgeqr2
subroutine sgeqr2(m, n, a, lda, tau, work, info)
SGEQR2 computes the QR factorization of a general rectangular matrix using an unblocked algorithm.
Definition sgeqr2.f:130

slarft
subroutine slarft(direct, storev, n, k, v, ldv, tau, t, ldt)
SLARFT forms the triangular factor T of a block reflector H = I - vtvH
Definition slarft.f:163

slarfb
subroutine slarfb(side, trans, direct, storev, m, n, k, v, ldv, t, ldt, c, ldc, work, ldwork)
SLARFB applies a block reflector or its transpose to a general rectangular matrix.
Definition slarfb.f:197

sgeqrf
subroutine sgeqrf(m, n, a, lda, tau, work, lwork, info)
SGEQRF VARIANT: left-looking Level 3 BLAS version of the algorithm.
Definition sgeqrf.f:151

min
#define min(a, b)
Definition macros.h:20

max
#define max(a, b)
Definition macros.h:21