VARIANTS_2lu_2REC_2sgetrf_8f_source.html

C> \brief \b SGETRF VARIANT: iterative version of Sivan Toledo's recursive LU algorithm

*

*  =========== DOCUMENTATION ===========

*

* Online html documentation available at

*            http://www.netlib.org/lapack/explore-html/

*

*  Definition:

*  ===========

*

*       SUBROUTINE SGETRF( M, N, A, LDA, IPIV, INFO )

*

*       .. Scalar Arguments ..

*       INTEGER            INFO, LDA, M, N

*       ..

*       .. Array Arguments ..

*       INTEGER            IPIV( * )

*       REAL               A( LDA, * )

*       ..

*

*  Purpose

*  =======

*

C>\details \b Purpose:

C>\verbatim

C>

C> SGETRF computes an LU factorization of a general M-by-N matrix A

C> using partial pivoting with row interchanges.

C>

C> The factorization has the form

C>    A = P * L * U

C> where P is a permutation matrix, L is lower triangular with unit

C> diagonal elements (lower trapezoidal if m > n), and U is upper

C> triangular (upper trapezoidal if m < n).

C>

C> This code implements an iterative version of Sivan Toledo's recursive

C> LU algorithm[1].  For square matrices, this iterative versions should

C> be within a factor of two of the optimum number of memory transfers.

C>

C> The pattern is as follows, with the large blocks of U being updated

C> in one call to STRSM, and the dotted lines denoting sections that

C> have had all pending permutations applied:

C>

C>  1 2 3 4 5 6 7 8

C> +-+-+---+-------+------

C> | |1|   |       |

C> |.+-+ 2 |       |

C> | | |   |       |

C> |.|.+-+-+   4   |

C> | | | |1|       |

C> | | |.+-+       |

C> | | | | |       |

C> |.|.|.|.+-+-+---+  8

C> | | | | | |1|   |

C> | | | | |.+-+ 2 |

C> | | | | | | |   |

C> | | | | |.|.+-+-+

C> | | | | | | | |1|

C> | | | | | | |.+-+

C> | | | | | | | | |

C> |.|.|.|.|.|.|.|.+-----

C> | | | | | | | | |

C>

C> The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in

C> the binary expansion of the current column.  Each Schur update is

C> applied as soon as the necessary portion of U is available.

C>

C> [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with

C> Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997),

C> 1065-1081. http://dx.doi.org/10.1137/S0895479896297744

C>

C>\endverbatim

*

*  Arguments:

*  ==========

*

C> \param[in] M

C> \verbatim

C>          M is INTEGER

C>          The number of rows of the matrix A.  M >= 0.

C> \endverbatim

C>

C> \param[in] N

C> \verbatim

C>          N is INTEGER

C>          The number of columns of the matrix A.  N >= 0.

C> \endverbatim

C>

C> \param[in,out] A

C> \verbatim

C>          A is REAL array, dimension (LDA,N)

C>          On entry, the M-by-N matrix to be factored.

C>          On exit, the factors L and U from the factorization

C>          A = P*L*U; the unit diagonal elements of L are not stored.

C> \endverbatim

C>

C> \param[in] LDA

C> \verbatim

C>          LDA is INTEGER

C>          The leading dimension of the array A.  LDA >= max(1,M).

C> \endverbatim

C>

C> \param[out] IPIV

C> \verbatim

C>          IPIV is INTEGER array, dimension (min(M,N))

C>          The pivot indices; for 1 <= i <= min(M,N), row i of the

C>          matrix was interchanged with row IPIV(i).

C> \endverbatim

C>

C> \param[out] INFO

C> \verbatim

C>          INFO is INTEGER

C>          = 0:  successful exit

C>          < 0:  if INFO = -i, the i-th argument had an illegal value

C>          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization

C>                has been completed, but the factor U is exactly

C>                singular, and division by zero will occur if it is used

C>                to solve a system of equations.

C> \endverbatim

C>

*

*  Authors:

*  ========

*

C> \author Univ. of Tennessee

C> \author Univ. of California Berkeley

C> \author Univ. of Colorado Denver

C> \author NAG Ltd.

*

C> \date December 2016

*

C> \ingroup variantsGEcomputational

*

*  =====================================================================

      SUBROUTINE sgetrf( M, N, A, LDA, IPIV, INFO )

*

*  -- LAPACK computational routine (version 3.X) --

*  -- LAPACK is a software package provided by Univ. of Tennessee,    --

*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

*

*     .. Scalar Arguments ..

      INTEGER            INFO, LDA, M, N

*     ..

*     .. Array Arguments ..

      INTEGER            IPIV( * )

      REAL               A( LDA, * )

*     ..

*

*  =====================================================================

*

*     .. Parameters ..

      REAL               ONE, ZERO, NEGONE

      parameter( one = 1.0e+0, zero = 0.0e+0 )

      parameter( negone = -1.0e+0 )

*     ..

*     .. Local Scalars ..

      REAL               SFMIN, TMP

      INTEGER            I, J, JP, NSTEP, NTOPIV, NPIVED, KAHEAD

      INTEGER            KSTART, IPIVSTART, JPIVSTART, KCOLS

*     ..

*     .. External Functions ..

      REAL               SLAMCH

      INTEGER            ISAMAX

      LOGICAL            SISNAN

      EXTERNAL           slamch, isamax, sisnan

*     ..

*     .. External Subroutines ..

      EXTERNAL           strsm, sscal, xerbla, slaswp

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          max, min, iand

*     ..

*     .. Executable Statements ..

*

*     Test the input parameters.

*

      info = 0

      IF( m.LT.0 ) THEN

         info = -1

      ELSE IF( n.LT.0 ) THEN

         info = -2

      ELSE IF( lda.LT.max( 1, m ) ) THEN

         info = -4

      END IF

      IF( info.NE.0 ) THEN

         CALL xerbla( 'SGETRF', -info )

         RETURN

      END IF

*

*     Quick return if possible

*

      IF( m.EQ.0 .OR. n.EQ.0 )

     $   RETURN

*

*     Compute machine safe minimum

*

      sfmin = slamch( 'S' )

*

      nstep = min( m, n )

      DO j = 1, nstep

         kahead = iand( j, -j )

         kstart = j + 1 - kahead

         kcols = min( kahead, m-j )

*

*        Find pivot.

*

         jp = j - 1 + isamax( m-j+1, a( j, j ), 1 )

         ipiv( j ) = jp


!        Permute just this column.

         IF (jp .NE. j) THEN

            tmp = a( j, j )

            a( j, j ) = a( jp, j )

            a( jp, j ) = tmp

         END IF


!        Apply pending permutations to L

         ntopiv = 1

         ipivstart = j

         jpivstart = j - ntopiv

         DO WHILE ( ntopiv .LT. kahead )

            CALL slaswp( ntopiv, a( 1, jpivstart ), lda, ipivstart, j,

     $           ipiv, 1 )

            ipivstart = ipivstart - ntopiv;

            ntopiv = ntopiv * 2;

            jpivstart = jpivstart - ntopiv;

         END DO


!        Permute U block to match L

         CALL slaswp( kcols, a( 1,j+1 ), lda, kstart, j, ipiv, 1 )


!        Factor the current column

         IF( a( j, j ).NE.zero .AND. .NOT.sisnan( a( j, j ) ) ) THEN

               IF( abs(a( j, j )) .GE. sfmin ) THEN

                  CALL sscal( m-j, one / a( j, j ), a( j+1, j ), 1 )

               ELSE

                 DO i = 1, m-j

                    a( j+i, j ) = a( j+i, j ) / a( j, j )

                 END DO

               END IF

         ELSE IF( a( j,j ) .EQ. zero .AND. info .EQ. 0 ) THEN

            info = j

         END IF


!        Solve for U block.

         CALL strsm( 'Left', 'Lower', 'No transpose', 'Unit', kahead,

     $        kcols, one, a( kstart, kstart ), lda,

     $        a( kstart, j+1 ), lda )

!        Schur complement.

         CALL sgemm( 'No transpose', 'No transpose', m-j,

     $        kcols, kahead, negone, a( j+1, kstart ), lda,

     $        a( kstart, j+1 ), lda, one, a( j+1, j+1 ), lda )

      END DO


!     Handle pivot permutations on the way out of the recursion

      npived = iand( nstep, -nstep )

      j = nstep - npived

      DO WHILE ( j .GT. 0 )

         ntopiv = iand( j, -j )

         CALL slaswp( ntopiv, a( 1, j-ntopiv+1 ), lda, j+1, nstep,

     $        ipiv, 1 )

         j = j - ntopiv

      END DO


!     If short and wide, handle the rest of the columns.

      IF ( m .LT. n ) THEN

         CALL slaswp( n-m, a( 1, m+kcols+1 ), lda, 1, m, ipiv, 1 )

         CALL strsm( 'Left', 'Lower', 'No transpose', 'Unit', m,

     $        n-m, one, a, lda, a( 1,m+kcols+1 ), lda )

      END IF


      RETURN

*

*     End of SGETRF

*

      END

xerbla
subroutine xerbla(srname, info)
XERBLA
Definition xerbla.f:60

sgetrf
subroutine sgetrf(m, n, a, lda, ipiv, info)
SGETRF
Definition sgetrf.f:108

slaswp
subroutine slaswp(n, a, lda, k1, k2, ipiv, incx)
SLASWP performs a series of row interchanges on a general rectangular matrix.
Definition slaswp.f:115

sscal
subroutine sscal(n, sa, sx, incx)
SSCAL
Definition sscal.f:79

strsm
subroutine strsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
STRSM
Definition strsm.f:181

sgemm
subroutine sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
SGEMM
Definition sgemm.f:187

min
#define min(a, b)
Definition macros.h:20

max
#define max(a, b)
Definition macros.h:21