OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
sgeqrf.f
Go to the documentation of this file.
1C> \brief \b SGEQRF VARIANT: left-looking Level 3 BLAS version of the algorithm.
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8* Definition:
9* ===========
10*
11* SUBROUTINE SGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )
12*
13* .. Scalar Arguments ..
14* INTEGER INFO, LDA, LWORK, M, N
15* ..
16* .. Array Arguments ..
17* REAL A( LDA, * ), TAU( * ), WORK( * )
18* ..
19*
20* Purpose
21* =======
22*
23C>\details \b Purpose:
24C>\verbatim
25C>
26C> SGEQRF computes a QR factorization of a real M-by-N matrix A:
27C> A = Q * R.
28C>
29C> This is the left-looking Level 3 BLAS version of the algorithm.
30C>
31C>\endverbatim
32*
33* Arguments:
34* ==========
35*
36C> \param[in] M
37C> \verbatim
38C> M is INTEGER
39C> The number of rows of the matrix A. M >= 0.
40C> \endverbatim
41C>
42C> \param[in] N
43C> \verbatim
44C> N is INTEGER
45C> The number of columns of the matrix A. N >= 0.
46C> \endverbatim
47C>
48C> \param[in,out] A
49C> \verbatim
50C> A is REAL array, dimension (LDA,N)
51C> On entry, the M-by-N matrix A.
52C> On exit, the elements on and above the diagonal of the array
53C> contain the min(M,N)-by-N upper trapezoidal matrix R (R is
54C> upper triangular if m >= n); the elements below the diagonal,
55C> with the array TAU, represent the orthogonal matrix Q as a
56C> product of min(m,n) elementary reflectors (see Further
57C> Details).
58C> \endverbatim
59C>
60C> \param[in] LDA
61C> \verbatim
62C> LDA is INTEGER
63C> The leading dimension of the array A. LDA >= max(1,M).
64C> \endverbatim
65C>
66C> \param[out] TAU
67C> \verbatim
68C> TAU is REAL array, dimension (min(M,N))
69C> The scalar factors of the elementary reflectors (see Further
70C> Details).
71C> \endverbatim
72C>
73C> \param[out] WORK
74C> \verbatim
75C> WORK is REAL array, dimension (MAX(1,LWORK))
76C> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
77C> \endverbatim
78C>
79C> \param[in] LWORK
80C> \verbatim
81C> LWORK is INTEGER
82C> \endverbatim
83C> \verbatim
84C> The dimension of the array WORK. The dimension can be divided into three parts.
85C> \endverbatim
86C> \verbatim
87C> 1) The part for the triangular factor T. If the very last T is not bigger
88C> than any of the rest, then this part is NB x ceiling(K/NB), otherwise,
89C> NB x (K-NT), where K = min(M,N) and NT is the dimension of the very last T
90C> \endverbatim
91C> \verbatim
92C> 2) The part for the very last T when T is bigger than any of the rest T.
93C> The size of this part is NT x NT, where NT = K - ceiling ((K-NX)/NB) x NB,
94C> where K = min(M,N), NX is calculated by
95C> NX = MAX( 0, ILAENV( 3, 'SGEQRF', ' ', M, N, -1, -1 ) )
96C> \endverbatim
97C> \verbatim
98C> 3) The part for dlarfb is of size max((N-M)*K, (N-M)*NB, K*NB, NB*NB)
99C> \endverbatim
100C> \verbatim
101C> So LWORK = part1 + part2 + part3
102C> \endverbatim
103C> \verbatim
104C> If LWORK = -1, then a workspace query is assumed; the routine
105C> only calculates the optimal size of the WORK array, returns
106C> this value as the first entry of the WORK array, and no error
107C> message related to LWORK is issued by XERBLA.
108C> \endverbatim
109C>
110C> \param[out] INFO
111C> \verbatim
112C> INFO is INTEGER
113C> = 0: successful exit
114C> < 0: if INFO = -i, the i-th argument had an illegal value
115C> \endverbatim
116C>
117*
118* Authors:
119* ========
120*
121C> \author Univ. of Tennessee
122C> \author Univ. of California Berkeley
123C> \author Univ. of Colorado Denver
124C> \author NAG Ltd.
125*
126C> \date December 2016
127*
128C> \ingroup variantsGEcomputational
129*
130* Further Details
131* ===============
132C>\details \b Further \b Details
133C> \verbatim
134C>
135C> The matrix Q is represented as a product of elementary reflectors
136C>
137C> Q = H(1) H(2) . . . H(k), where k = min(m,n).
138C>
139C> Each H(i) has the form
140C>
141C> H(i) = I - tau * v * v'
142C>
143C> where tau is a real scalar, and v is a real vector with
144C> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
145C> and tau in TAU(i).
146C>
147C> \endverbatim
148C>
149* =====================================================================
150 SUBROUTINE sgeqrf ( M, N, A, LDA, TAU, WORK, LWORK, INFO )
151*
152* -- LAPACK computational routine --
153* -- LAPACK is a software package provided by Univ. of Tennessee, --
154* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
155*
156* .. Scalar Arguments ..
157 INTEGER INFO, LDA, LWORK, M, N
158* ..
159* .. Array Arguments ..
160 REAL A( LDA, * ), TAU( * ), WORK( * )
161* ..
162*
163* =====================================================================
164*
165* .. Local Scalars ..
166 LOGICAL LQUERY
167 INTEGER I, IB, IINFO, IWS, J, K, LWKOPT, NB,
168 $ NBMIN, NX, LBWORK, NT, LLWORK
169* ..
170* .. External Subroutines ..
171 EXTERNAL sgeqr2, slarfb, slarft, xerbla
172* ..
173* .. Intrinsic Functions ..
174 INTRINSIC max, min
175* ..
176* .. External Functions ..
177 INTEGER ILAENV
178 REAL SCEIL
179 EXTERNAL ilaenv, sceil
180* ..
181* .. Executable Statements ..
182
183 info = 0
184 nbmin = 2
185 nx = 0
186 iws = n
187 k = min( m, n )
188 nb = ilaenv( 1, 'SGEQRF', ' ', m, n, -1, -1 )
189
190 IF( nb.GT.1 .AND. nb.LT.k ) THEN
191*
192* Determine when to cross over from blocked to unblocked code.
193*
194 nx = max( 0, ilaenv( 3, 'SGEQRF', ' ', m, n, -1, -1 ) )
195 END IF
196*
197* Get NT, the size of the very last T, which is the left-over from in-between K-NX and K to K, eg.:
198*
199* NB=3 2NB=6 K=10
200* | | |
201* 1--2--3--4--5--6--7--8--9--10
202* | \________/
203* K-NX=5 NT=4
204*
205* So here 4 x 4 is the last T stored in the workspace
206*
207 nt = k-sceil(real(k-nx)/real(nb))*nb
208
209*
210* optimal workspace = space for dlarfb + space for normal T's + space for the last T
211*
212 llwork = max(max((n-m)*k, (n-m)*nb), max(k*nb, nb*nb))
213 llwork = sceil(real(llwork)/real(nb))
214
215 IF ( nt.GT.nb ) THEN
216
217 lbwork = k-nt
218*
219* Optimal workspace for dlarfb = MAX(1,N)*NT
220*
221 lwkopt = (lbwork+llwork)*nb
222 work( 1 ) = (lwkopt+nt*nt)
223
224 ELSE
225
226 lbwork = sceil(real(k)/real(nb))*nb
227 lwkopt = (lbwork+llwork-nb)*nb
228 work( 1 ) = lwkopt
229
230 END IF
231
232*
233* Test the input arguments
234*
235 lquery = ( lwork.EQ.-1 )
236 IF( m.LT.0 ) THEN
237 info = -1
238 ELSE IF( n.LT.0 ) THEN
239 info = -2
240 ELSE IF( lda.LT.max( 1, m ) ) THEN
241 info = -4
242 ELSE IF( lwork.LT.max( 1, n ) .AND. .NOT.lquery ) THEN
243 info = -7
244 END IF
245 IF( info.NE.0 ) THEN
246 CALL xerbla( 'SGEQRF', -info )
247 RETURN
248 ELSE IF( lquery ) THEN
249 RETURN
250 END IF
251*
252* Quick return if possible
253*
254 IF( k.EQ.0 ) THEN
255 work( 1 ) = 1
256 RETURN
257 END IF
258*
259 IF( nb.GT.1 .AND. nb.LT.k ) THEN
260
261 IF( nx.LT.k ) THEN
262*
263* Determine if workspace is large enough for blocked code.
264*
265 IF ( nt.LE.nb ) THEN
266 iws = (lbwork+llwork-nb)*nb
267 ELSE
268 iws = (lbwork+llwork)*nb+nt*nt
269 END IF
270
271 IF( lwork.LT.iws ) THEN
272*
273* Not enough workspace to use optimal NB: reduce NB and
274* determine the minimum value of NB.
275*
276 IF ( nt.LE.nb ) THEN
277 nb = lwork / (llwork+(lbwork-nb))
278 ELSE
279 nb = (lwork-nt*nt)/(lbwork+llwork)
280 END IF
281
282 nbmin = max( 2, ilaenv( 2, 'SGEQRF', ' ', m, n, -1,
283 $ -1 ) )
284 END IF
285 END IF
286 END IF
287*
288 IF( nb.GE.nbmin .AND. nb.LT.k .AND. nx.LT.k ) THEN
289*
290* Use blocked code initially
291*
292 DO 10 i = 1, k - nx, nb
293 ib = min( k-i+1, nb )
294*
295* Update the current column using old T's
296*
297 DO 20 j = 1, i - nb, nb
298*
299* Apply H' to A(J:M,I:I+IB-1) from the left
300*
301 CALL slarfb( 'Left', 'Transpose', 'Forward',
302 $ 'Columnwise', m-j+1, ib, nb,
303 $ a( j, j ), lda, work(j), lbwork,
304 $ a( j, i ), lda, work(lbwork*nb+nt*nt+1),
305 $ ib)
306
30720 CONTINUE
308*
309* Compute the QR factorization of the current block
310* A(I:M,I:I+IB-1)
311*
312 CALL sgeqr2( m-i+1, ib, a( i, i ), lda, tau( i ),
313 $ work(lbwork*nb+nt*nt+1), iinfo )
314
315 IF( i+ib.LE.n ) THEN
316*
317* Form the triangular factor of the block reflector
318* H = H(i) H(i+1) . . . H(i+ib-1)
319*
320 CALL slarft( 'Forward', 'Columnwise', m-i+1, ib,
321 $ a( i, i ), lda, tau( i ),
322 $ work(i), lbwork )
323*
324 END IF
325 10 CONTINUE
326 ELSE
327 i = 1
328 END IF
329*
330* Use unblocked code to factor the last or only block.
331*
332 IF( i.LE.k ) THEN
333
334 IF ( i .NE. 1 ) THEN
335
336 DO 30 j = 1, i - nb, nb
337*
338* Apply H' to A(J:M,I:K) from the left
339*
340 CALL slarfb( 'Left', 'Transpose', 'Forward',
341 $ 'Columnwise', m-j+1, k-i+1, nb,
342 $ a( j, j ), lda, work(j), lbwork,
343 $ a( j, i ), lda, work(lbwork*nb+nt*nt+1),
344 $ k-i+1)
34530 CONTINUE
346
347 CALL sgeqr2( m-i+1, k-i+1, a( i, i ), lda, tau( i ),
348 $ work(lbwork*nb+nt*nt+1),iinfo )
349
350 ELSE
351*
352* Use unblocked code to factor the last or only block.
353*
354 CALL sgeqr2( m-i+1, n-i+1, a( i, i ), lda, tau( i ),
355 $ work,iinfo )
356
357 END IF
358 END IF
359
360
361*
362* Apply update to the column M+1:N when N > M
363*
364 IF ( m.LT.n .AND. i.NE.1) THEN
365*
366* Form the last triangular factor of the block reflector
367* H = H(i) H(i+1) . . . H(i+ib-1)
368*
369 IF ( nt .LE. nb ) THEN
370 CALL slarft( 'Forward', 'Columnwise', m-i+1, k-i+1,
371 $ a( i, i ), lda, tau( i ), work(i), lbwork )
372 ELSE
373 CALL slarft( 'Forward', 'Columnwise', m-i+1, k-i+1,
374 $ a( i, i ), lda, tau( i ),
375 $ work(lbwork*nb+1), nt )
376 END IF
377
378*
379* Apply H' to A(1:M,M+1:N) from the left
380*
381 DO 40 j = 1, k-nx, nb
382
383 ib = min( k-j+1, nb )
384
385 CALL slarfb( 'Left', 'Transpose', 'Forward',
386 $ 'Columnwise', m-j+1, n-m, ib,
387 $ a( j, j ), lda, work(j), lbwork,
388 $ a( j, m+1 ), lda, work(lbwork*nb+nt*nt+1),
389 $ n-m)
390
39140 CONTINUE
392
393 IF ( nt.LE.nb ) THEN
394 CALL slarfb( 'Left', 'transpose', 'forward',
395 $ 'columnwise', M-J+1, N-M, K-J+1,
396 $ A( J, J ), LDA, WORK(J), LBWORK,
397 $ A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
398 $ N-M)
399 ELSE
400 CALL SLARFB( 'left', 'transpose', 'forward',
401 $ 'columnwise', M-J+1, N-M, K-J+1,
402 $ A( J, J ), LDA,
403 $ WORK(LBWORK*NB+1),
404 $ NT, A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
405 $ N-M)
406 END IF
407
408 END IF
409
410 WORK( 1 ) = IWS
411 RETURN
412*
413* End of SGEQRF
414*
415 END
subroutine xerbla(srname, info)
XERBLA
Definition xerbla.f:60
subroutine sgeqr2(m, n, a, lda, tau, work, info)
SGEQR2 computes the QR factorization of a general rectangular matrix using an unblocked algorithm.
Definition sgeqr2.f:130
subroutine slarft(direct, storev, n, k, v, ldv, tau, t, ldt)
SLARFT forms the triangular factor T of a block reflector H = I - vtvH
Definition slarft.f:163
subroutine slarfb(side, trans, direct, storev, m, n, k, v, ldv, t, ldt, c, ldc, work, ldwork)
SLARFB applies a block reflector or its transpose to a general rectangular matrix.
Definition slarfb.f:197
subroutine sgeqrf(m, n, a, lda, tau, work, lwork, info)
SGEQRF VARIANT: left-looking Level 3 BLAS version of the algorithm.
Definition sgeqrf.f:151
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21