ddbtrf_8f_source.html

      SUBROUTINE ddbtrf( M, N, KL, KU, AB, LDAB, INFO )

*

*  -- ScaLAPACK auxiliary routine (version 2.0) --

*     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver

*

*     Written by Andrew J. Cleary, University of Tennessee.

*     August, 1996.

*     Modified from DGBTRF:

*  -- LAPACK routine (preliminary version) --

*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,

*     Courant Institute, Argonne National Lab, and Rice University

*     August 6, 1991

*

*     .. Scalar Arguments ..

      INTEGER            INFO, KL, KU, LDAB, M, N

*     ..

*     .. Array Arguments ..

      DOUBLE PRECISION   AB( LDAB, * )

*     ..

*

*  Purpose

*  =======

*

*  Ddbtrf computes an LU factorization of a real m-by-n band matrix A

*  without using partial pivoting or row interchanges.

*

*  This is the blocked version of the algorithm, calling Level 3 BLAS.

*

*  Arguments

*  =========

*

*  M       (input) INTEGER

*          The number of rows of the matrix A.  M >= 0.

*

*  N       (input) INTEGER

*          The number of columns of the matrix A.  N >= 0.

*

*  KL      (input) INTEGER

*          The number of subdiagonals within the band of A.  KL >= 0.

*

*  KU      (input) INTEGER

*          The number of superdiagonals within the band of A.  KU >= 0.

*

*  AB      (input/output) REAL array, dimension (LDAB,N)

*          On entry, the matrix A in band storage, in rows KL+1 to

*          2*KL+KU+1; rows 1 to KL of the array need not be set.

*          The j-th column of A is stored in the j-th column of the

*          array AB as follows:

*          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)

*

*          On exit, details of the factorization: U is stored as an

*          upper triangular band matrix with KL+KU superdiagonals in

*          rows 1 to KL+KU+1, and the multipliers used during the

*          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.

*          See below for further details.

*

*  LDAB    (input) INTEGER

*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.

*

*  INFO    (output) INTEGER

*          = 0: successful exit

*          < 0: if INFO = -i, the i-th argument had an illegal value

*          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization

*               has been completed, but the factor U is exactly

*               singular, and division by zero will occur if it is used

*               to solve a system of equations.

*

*  Further Details

*  ===============

*

*  The band storage scheme is illustrated by the following example, when

*  M = N = 6, KL = 2, KU = 1:

*

*  On entry:                       On exit:

*

*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56

*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66

*     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *

*     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *

*

*  Array elements marked * are not used by the routine.

*

*  =====================================================================

*

*     .. Parameters ..

      DOUBLE PRECISION   ONE, ZERO

      parameter( one = 1.0d+0 )

      parameter( zero = 0.0d+0 )

      INTEGER            NBMAX, LDWORK

      parameter( nbmax = 64, ldwork = nbmax+1 )

*     ..

*     .. Local Scalars ..

      INTEGER            I, I2, I3, II, J, J2, J3, JB, JJ, JM, JP,

     $                   JU, KM, KV, NB, NW

*     ..

*     .. Local Arrays ..

      DOUBLE PRECISION     WORK13( LDWORK, NBMAX ),

     $                   WORK31( LDWORK, NBMAX )

*     ..

*     .. External Functions ..

      INTEGER            ILAENV, ISAMAX

      EXTERNAL           ilaenv, isamax

*     ..

*     .. External Subroutines ..

      EXTERNAL           dcopy, ddbtf2, dgemm, dger, dscal,

     $                   dswap, dtrsm, xerbla

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          max, min

*     ..

*     .. Executable Statements ..

*

*     KV is the number of superdiagonals in the factor U

*

      kv = ku

*

*     Test the input parameters.

*

      info = 0

      IF( m.LT.0 ) THEN

         info = -1

      ELSE IF( n.LT.0 ) THEN

         info = -2

      ELSE IF( kl.LT.0 ) THEN

         info = -3

      ELSE IF( ku.LT.0 ) THEN

         info = -4

      ELSE IF( ldab.LT.min( min( kl+kv+1,m ),n ) ) THEN

         info = -6

      END IF

      IF( info.NE.0 ) THEN

         CALL xerbla( 'DDBTRF', -info )

         RETURN

      END IF

*

*     Quick return if possible

*

      IF( m.EQ.0 .OR. n.EQ.0 )

     $     RETURN

*

*     Determine the block size for this environment

*

      nb = ilaenv( 1, 'ddbtrf', ' ', M, N, KL, KU )

*

*     The block size must not exceed the limit set by the size of the

*     local arrays WORK13 and WORK31.

*

      NB = MIN( NB, NBMAX )

*

.LE..OR..GT.      IF( NB1  NBKL ) THEN

*

*        Use unblocked code

*

         CALL DDBTF2( M, N, KL, KU, AB, LDAB, INFO )

      ELSE

*

*        Use blocked code

*

*        Zero the superdiagonal elements of the work array WORK13

*

         DO 20 J = 1, NB

            DO 10 I = 1, J - 1

               WORK13( I, J ) = ZERO

   10       CONTINUE

   20    CONTINUE

*

*        Zero the subdiagonal elements of the work array WORK31

*

         DO 40 J = 1, NB

            DO 30 I = J + 1, NB

               WORK31( I, J ) = ZERO

   30       CONTINUE

   40    CONTINUE

*

*        JU is the index of the last column affected by the current

*        stage of the factorization

*

         JU = 1

*

         DO 180 J = 1, MIN( M, N ), NB

            JB = MIN( NB, MIN( M, N )-J+1 )

*

*           The active part of the matrix is partitioned

*

*              A11   A12   A13

*              A21   A22   A23

*              A31   A32   A33

*

*           Here A11, A21 and A31 denote the current block of JB columns

*           which is about to be factorized. The number of rows in the

*           partitioning are JB, I2, I3 respectively, and the numbers

*           of columns are JB, J2, J3. The superdiagonal elements of A13

*           and the subdiagonal elements of A31 lie outside the band.

*

            I2 = MIN( KL-JB, M-J-JB+1 )

            I3 = MIN( JB, M-J-KL+1 )

*

*           J2 and J3 are computed after JU has been updated.

*

*           Factorize the current block of JB columns

*

            DO 80 JJ = J, J + JB - 1

*

*              Find pivot and test for singularity. KM is the number of

*              subdiagonal elements in the current column.

*

               KM = MIN( KL, M-JJ )

               JP = 1

.NE.               IF( AB( KV+JP, JJ )ZERO ) THEN

                  JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )

*

*                 Compute multipliers

*

                  CALL DSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),

     $                 1 )

*

*                 Update trailing submatrix within the band and within

*                 the current block. JM is the index of the last column

*                 which needs to be updated.

*

                  JM = MIN( JU, J+JB-1 )

.GT.                  IF( JMJJ ) THEN

                     CALL DGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,

     $                          AB( KV, JJ+1 ), LDAB-1,

     $                          AB( KV+1, JJ+1 ), LDAB-1 )

                  END IF

               END IF

*

*              Copy current column of A31 into the work array WORK31

*

               NW = MIN( JJ-J+1, I3 )

.GT.               IF( NW0 )

     $            CALL DCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,

     $                        WORK31( 1, JJ-J+1 ), 1 )

   80       CONTINUE

.LE.            IF( J+JBN ) THEN

*

*              Apply the row interchanges to the other blocks.

*

               J2 = MIN( JU-J+1, KV ) - JB

               J3 = MAX( 0, JU-J-KV+1 )

*

*              Update the relevant part of the trailing submatrix

*

.GT.               IF( J20 ) THEN

*

*                 Update A12

*

                  CALL DTRSM( 'left', 'lower', 'no transpose', 'unit',

     $                        JB, J2, ONE, AB( KV+1, J ), LDAB-1,

     $                        AB( KV+1-JB, J+JB ), LDAB-1 )

*

.GT.                  IF( I20 ) THEN

*

*                    Update A22

*

                     CALL DGEMM( 'no transpose', 'no transpose', I2, J2,

     $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,

     $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,

     $                           AB( KV+1, J+JB ), LDAB-1 )

                  END IF

*

.GT.                  IF( I30 ) THEN

*

*                    Update A32

*

                     CALL DGEMM( 'no transpose', 'no transpose', I3, J2,

     $                           JB, -ONE, WORK31, LDWORK,

     $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,

     $                           AB( KV+KL+1-JB, J+JB ), LDAB-1 )

                  END IF

               END IF

*

.GT.               IF( J30 ) THEN

*

*                 Copy the lower triangle of A13 into the work array

*                 WORK13

*

                  DO 130 JJ = 1, J3

                     DO 120 II = JJ, JB

                        WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )

  120                CONTINUE

  130             CONTINUE

*

*                 Update A13 in the work array

*

                  CALL DTRSM( 'left', 'lower', 'no transpose', 'unit',

     $                        JB, J3, ONE, AB( KV+1, J ), LDAB-1,

     $                        WORK13, LDWORK )

*

.GT.                  IF( I20 ) THEN

*

*                    Update A23

*

                     CALL DGEMM( 'no transpose', 'no transpose', I2, J3,

     $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,

     $                           WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),

     $                           LDAB-1 )

                  END IF

*

.GT.                  IF( I30 ) THEN

*

*                    Update A33

*

                     CALL DGEMM( 'no transpose', 'no transpose', I3, J3,

     $                         JB, -ONE, WORK31, LDWORK, WORK13,

     $                         LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )

                  END IF

*

*                 Copy the lower triangle of A13 back into place

*

                  DO 150 JJ = 1, J3

                     DO 140 II = JJ, JB

                        AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )

  140                CONTINUE

  150             CONTINUE

               END IF

            ELSE

            END IF

*

*           copy the upper triangle of A31 back into place

*

            DO 170 JJ = J + JB - 1, J, -1

*

*              Copy the current column of A31 back into place

*

               NW = MIN( I3, JJ-J+1 )

.GT.               IF( NW0 )

     $            CALL DCOPY( NW, WORK31( 1, JJ-J+1 ), 1,

     $                        AB( KV+KL+1-JJ+J, JJ ), 1 )

  170       CONTINUE

  180    CONTINUE

      END IF

*

      RETURN

*

*     End of DDBTRF

*


      END

ddbtf2
subroutine ddbtf2(m, n, kl, ku, ab, ldab, info)
Definition ddbtf2.f:2

ddbtrf
subroutine ddbtrf(m, n, kl, ku, ab, ldab, info)
Definition ddbtrf.f:2

xerbla
subroutine xerbla(srname, info)
XERBLA
Definition xerbla.f:60

dscal
subroutine dscal(n, da, dx, incx)
DSCAL
Definition dscal.f:79

dswap
subroutine dswap(n, dx, incx, dy, incy)
DSWAP
Definition dswap.f:82

dcopy
subroutine dcopy(n, dx, incx, dy, incy)
DCOPY
Definition dcopy.f:82

dger
subroutine dger(m, n, alpha, x, incx, y, incy, a, lda)
DGER
Definition dger.f:130

dgemm
subroutine dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
DGEMM
Definition dgemm.f:187

dtrsm
subroutine dtrsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
DTRSM
Definition dtrsm.f:181

min
#define min(a, b)
Definition macros.h:20

max
#define max(a, b)
Definition macros.h:21