OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
ddbtrf.f
Go to the documentation of this file.
1 SUBROUTINE ddbtrf( M, N, KL, KU, AB, LDAB, INFO )
2*
3* -- ScaLAPACK auxiliary routine (version 2.0) --
4* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
5*
6* Written by Andrew J. Cleary, University of Tennessee.
7* August, 1996.
8* Modified from DGBTRF:
9* -- LAPACK routine (preliminary version) --
10* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
11* Courant Institute, Argonne National Lab, and Rice University
12* August 6, 1991
13*
14* .. Scalar Arguments ..
15 INTEGER INFO, KL, KU, LDAB, M, N
16* ..
17* .. Array Arguments ..
18 DOUBLE PRECISION AB( LDAB, * )
19* ..
20*
21* Purpose
22* =======
23*
24* Ddbtrf computes an LU factorization of a real m-by-n band matrix A
25* without using partial pivoting or row interchanges.
26*
27* This is the blocked version of the algorithm, calling Level 3 BLAS.
28*
29* Arguments
30* =========
31*
32* M (input) INTEGER
33* The number of rows of the matrix A. M >= 0.
34*
35* N (input) INTEGER
36* The number of columns of the matrix A. N >= 0.
37*
38* KL (input) INTEGER
39* The number of subdiagonals within the band of A. KL >= 0.
40*
41* KU (input) INTEGER
42* The number of superdiagonals within the band of A. KU >= 0.
43*
44* AB (input/output) REAL array, dimension (LDAB,N)
45* On entry, the matrix A in band storage, in rows KL+1 to
46* 2*KL+KU+1; rows 1 to KL of the array need not be set.
47* The j-th column of A is stored in the j-th column of the
48* array AB as follows:
49* AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
50*
51* On exit, details of the factorization: U is stored as an
52* upper triangular band matrix with KL+KU superdiagonals in
53* rows 1 to KL+KU+1, and the multipliers used during the
54* factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
55* See below for further details.
56*
57* LDAB (input) INTEGER
58* The leading dimension of the array AB. LDAB >= 2*KL+KU+1.
59*
60* INFO (output) INTEGER
61* = 0: successful exit
62* < 0: if INFO = -i, the i-th argument had an illegal value
63* > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
64* has been completed, but the factor U is exactly
65* singular, and division by zero will occur if it is used
66* to solve a system of equations.
67*
68* Further Details
69* ===============
70*
71* The band storage scheme is illustrated by the following example, when
72* M = N = 6, KL = 2, KU = 1:
73*
74* On entry: On exit:
75*
76* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56
77* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66
78* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 *
79* a31 a42 a53 a64 * * m31 m42 m53 m64 * *
80*
81* Array elements marked * are not used by the routine.
82*
83* =====================================================================
84*
85* .. Parameters ..
86 DOUBLE PRECISION ONE, ZERO
87 parameter( one = 1.0d+0 )
88 parameter( zero = 0.0d+0 )
89 INTEGER NBMAX, LDWORK
90 parameter( nbmax = 64, ldwork = nbmax+1 )
91* ..
92* .. Local Scalars ..
93 INTEGER I, I2, I3, II, J, J2, J3, JB, JJ, JM, JP,
94 $ JU, KM, KV, NB, NW
95* ..
96* .. Local Arrays ..
97 DOUBLE PRECISION WORK13( LDWORK, NBMAX ),
98 $ WORK31( LDWORK, NBMAX )
99* ..
100* .. External Functions ..
101 INTEGER ILAENV, ISAMAX
102 EXTERNAL ilaenv, isamax
103* ..
104* .. External Subroutines ..
105 EXTERNAL dcopy, ddbtf2, dgemm, dger, dscal,
106 $ dswap, dtrsm, xerbla
107* ..
108* .. Intrinsic Functions ..
109 INTRINSIC max, min
110* ..
111* .. Executable Statements ..
112*
113* KV is the number of superdiagonals in the factor U
114*
115 kv = ku
116*
117* Test the input parameters.
118*
119 info = 0
120 IF( m.LT.0 ) THEN
121 info = -1
122 ELSE IF( n.LT.0 ) THEN
123 info = -2
124 ELSE IF( kl.LT.0 ) THEN
125 info = -3
126 ELSE IF( ku.LT.0 ) THEN
127 info = -4
128 ELSE IF( ldab.LT.min( min( kl+kv+1,m ),n ) ) THEN
129 info = -6
130 END IF
131 IF( info.NE.0 ) THEN
132 CALL xerbla( 'DDBTRF', -info )
133 RETURN
134 END IF
135*
136* Quick return if possible
137*
138 IF( m.EQ.0 .OR. n.EQ.0 )
139 $ RETURN
140*
141* Determine the block size for this environment
142*
143 nb = ilaenv( 1, 'ddbtrf', ' ', M, N, KL, KU )
144*
145* The block size must not exceed the limit set by the size of the
146* local arrays WORK13 and WORK31.
147*
148 NB = MIN( NB, NBMAX )
149*
150.LE..OR..GT. IF( NB1 NBKL ) THEN
151*
152* Use unblocked code
153*
154 CALL DDBTF2( M, N, KL, KU, AB, LDAB, INFO )
155 ELSE
156*
157* Use blocked code
158*
159* Zero the superdiagonal elements of the work array WORK13
160*
161 DO 20 J = 1, NB
162 DO 10 I = 1, J - 1
163 WORK13( I, J ) = ZERO
164 10 CONTINUE
165 20 CONTINUE
166*
167* Zero the subdiagonal elements of the work array WORK31
168*
169 DO 40 J = 1, NB
170 DO 30 I = J + 1, NB
171 WORK31( I, J ) = ZERO
172 30 CONTINUE
173 40 CONTINUE
174*
175* JU is the index of the last column affected by the current
176* stage of the factorization
177*
178 JU = 1
179*
180 DO 180 J = 1, MIN( M, N ), NB
181 JB = MIN( NB, MIN( M, N )-J+1 )
182*
183* The active part of the matrix is partitioned
184*
185* A11 A12 A13
186* A21 A22 A23
187* A31 A32 A33
188*
189* Here A11, A21 and A31 denote the current block of JB columns
190* which is about to be factorized. The number of rows in the
191* partitioning are JB, I2, I3 respectively, and the numbers
192* of columns are JB, J2, J3. The superdiagonal elements of A13
193* and the subdiagonal elements of A31 lie outside the band.
194*
195 I2 = MIN( KL-JB, M-J-JB+1 )
196 I3 = MIN( JB, M-J-KL+1 )
197*
198* J2 and J3 are computed after JU has been updated.
199*
200* Factorize the current block of JB columns
201*
202 DO 80 JJ = J, J + JB - 1
203*
204* Find pivot and test for singularity. KM is the number of
205* subdiagonal elements in the current column.
206*
207 KM = MIN( KL, M-JJ )
208 JP = 1
209.NE. IF( AB( KV+JP, JJ )ZERO ) THEN
210 JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
211*
212* Compute multipliers
213*
214 CALL DSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
215 $ 1 )
216*
217* Update trailing submatrix within the band and within
218* the current block. JM is the index of the last column
219* which needs to be updated.
220*
221 JM = MIN( JU, J+JB-1 )
222.GT. IF( JMJJ ) THEN
223 CALL DGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
224 $ AB( KV, JJ+1 ), LDAB-1,
225 $ AB( KV+1, JJ+1 ), LDAB-1 )
226 END IF
227 END IF
228*
229* Copy current column of A31 into the work array WORK31
230*
231 NW = MIN( JJ-J+1, I3 )
232.GT. IF( NW0 )
233 $ CALL DCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
234 $ WORK31( 1, JJ-J+1 ), 1 )
235 80 CONTINUE
236.LE. IF( J+JBN ) THEN
237*
238* Apply the row interchanges to the other blocks.
239*
240 J2 = MIN( JU-J+1, KV ) - JB
241 J3 = MAX( 0, JU-J-KV+1 )
242*
243* Update the relevant part of the trailing submatrix
244*
245.GT. IF( J20 ) THEN
246*
247* Update A12
248*
249 CALL DTRSM( 'left', 'lower', 'no transpose', 'unit',
250 $ JB, J2, ONE, AB( KV+1, J ), LDAB-1,
251 $ AB( KV+1-JB, J+JB ), LDAB-1 )
252*
253.GT. IF( I20 ) THEN
254*
255* Update A22
256*
257 CALL DGEMM( 'no transpose', 'no transpose', I2, J2,
258 $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
259 $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
260 $ AB( KV+1, J+JB ), LDAB-1 )
261 END IF
262*
263.GT. IF( I30 ) THEN
264*
265* Update A32
266*
267 CALL DGEMM( 'no transpose', 'no transpose', I3, J2,
268 $ JB, -ONE, WORK31, LDWORK,
269 $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
270 $ AB( KV+KL+1-JB, J+JB ), LDAB-1 )
271 END IF
272 END IF
273*
274.GT. IF( J30 ) THEN
275*
276* Copy the lower triangle of A13 into the work array
277* WORK13
278*
279 DO 130 JJ = 1, J3
280 DO 120 II = JJ, JB
281 WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
282 120 CONTINUE
283 130 CONTINUE
284*
285* Update A13 in the work array
286*
287 CALL DTRSM( 'left', 'lower', 'no transpose', 'unit',
288 $ JB, J3, ONE, AB( KV+1, J ), LDAB-1,
289 $ WORK13, LDWORK )
290*
291.GT. IF( I20 ) THEN
292*
293* Update A23
294*
295 CALL DGEMM( 'no transpose', 'no transpose', I2, J3,
296 $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
297 $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
298 $ LDAB-1 )
299 END IF
300*
301.GT. IF( I30 ) THEN
302*
303* Update A33
304*
305 CALL DGEMM( 'no transpose', 'no transpose', I3, J3,
306 $ JB, -ONE, WORK31, LDWORK, WORK13,
307 $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
308 END IF
309*
310* Copy the lower triangle of A13 back into place
311*
312 DO 150 JJ = 1, J3
313 DO 140 II = JJ, JB
314 AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
315 140 CONTINUE
316 150 CONTINUE
317 END IF
318 ELSE
319 END IF
320*
321* copy the upper triangle of A31 back into place
322*
323 DO 170 JJ = J + JB - 1, J, -1
324*
325* Copy the current column of A31 back into place
326*
327 NW = MIN( I3, JJ-J+1 )
328.GT. IF( NW0 )
329 $ CALL DCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
330 $ AB( KV+KL+1-JJ+J, JJ ), 1 )
331 170 CONTINUE
332 180 CONTINUE
333 END IF
334*
335 RETURN
336*
337* End of DDBTRF
338*
339 END
subroutine ddbtf2(m, n, kl, ku, ab, ldab, info)
Definition ddbtf2.f:2
subroutine ddbtrf(m, n, kl, ku, ab, ldab, info)
Definition ddbtrf.f:2
subroutine xerbla(srname, info)
XERBLA
Definition xerbla.f:60
subroutine dscal(n, da, dx, incx)
DSCAL
Definition dscal.f:79
subroutine dswap(n, dx, incx, dy, incy)
DSWAP
Definition dswap.f:82
subroutine dcopy(n, dx, incx, dy, incy)
DCOPY
Definition dcopy.f:82
subroutine dger(m, n, alpha, x, incx, y, incy, a, lda)
DGER
Definition dger.f:130
subroutine dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
DGEMM
Definition dgemm.f:187
subroutine dtrsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
DTRSM
Definition dtrsm.f:181
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21