OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
dlaqr5.f
Go to the documentation of this file.
1*> \brief \b DLAQR5 performs a single small-bulge multi-shift QR sweep.
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download DLAQR5 + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaqr5.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaqr5.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaqr5.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18* Definition:
19* ===========
20*
21* SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
22* SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
23* LDU, NV, WV, LDWV, NH, WH, LDWH )
24*
25* .. Scalar Arguments ..
26* INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
27* $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
28* LOGICAL WANTT, WANTZ
29* ..
30* .. Array Arguments ..
31* DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
32* $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
33* $ Z( LDZ, * )
34* ..
35*
36*
37*> \par Purpose:
38* =============
39*>
40*> \verbatim
41*>
42*> DLAQR5, called by DLAQR0, performs a
43*> single small-bulge multi-shift QR sweep.
44*> \endverbatim
45*
46* Arguments:
47* ==========
48*
49*> \param[in] WANTT
50*> \verbatim
51*> WANTT is LOGICAL
52*> WANTT = .true. if the quasi-triangular Schur factor
53*> is being computed. WANTT is set to .false. otherwise.
54*> \endverbatim
55*>
56*> \param[in] WANTZ
57*> \verbatim
58*> WANTZ is LOGICAL
59*> WANTZ = .true. if the orthogonal Schur factor is being
60*> computed. WANTZ is set to .false. otherwise.
61*> \endverbatim
62*>
63*> \param[in] KACC22
64*> \verbatim
65*> KACC22 is INTEGER with value 0, 1, or 2.
66*> Specifies the computation mode of far-from-diagonal
67*> orthogonal updates.
68*> = 0: DLAQR5 does not accumulate reflections and does not
69*> use matrix-matrix multiply to update far-from-diagonal
70*> matrix entries.
71*> = 1: DLAQR5 accumulates reflections and uses matrix-matrix
72*> multiply to update the far-from-diagonal matrix entries.
73*> = 2: Same as KACC22 = 1. This option used to enable exploiting
74*> the 2-by-2 structure during matrix multiplications, but
75*> this is no longer supported.
76*> \endverbatim
77*>
78*> \param[in] N
79*> \verbatim
80*> N is INTEGER
81*> N is the order of the Hessenberg matrix H upon which this
82*> subroutine operates.
83*> \endverbatim
84*>
85*> \param[in] KTOP
86*> \verbatim
87*> KTOP is INTEGER
88*> \endverbatim
89*>
90*> \param[in] KBOT
91*> \verbatim
92*> KBOT is INTEGER
93*> These are the first and last rows and columns of an
94*> isolated diagonal block upon which the QR sweep is to be
95*> applied. It is assumed without a check that
96*> either KTOP = 1 or H(KTOP,KTOP-1) = 0
97*> and
98*> either KBOT = N or H(KBOT+1,KBOT) = 0.
99*> \endverbatim
100*>
101*> \param[in] NSHFTS
102*> \verbatim
103*> NSHFTS is INTEGER
104*> NSHFTS gives the number of simultaneous shifts. NSHFTS
105*> must be positive and even.
106*> \endverbatim
107*>
108*> \param[in,out] SR
109*> \verbatim
110*> SR is DOUBLE PRECISION array, dimension (NSHFTS)
111*> \endverbatim
112*>
113*> \param[in,out] SI
114*> \verbatim
115*> SI is DOUBLE PRECISION array, dimension (NSHFTS)
116*> SR contains the real parts and SI contains the imaginary
117*> parts of the NSHFTS shifts of origin that define the
118*> multi-shift QR sweep. On output SR and SI may be
119*> reordered.
120*> \endverbatim
121*>
122*> \param[in,out] H
123*> \verbatim
124*> H is DOUBLE PRECISION array, dimension (LDH,N)
125*> On input H contains a Hessenberg matrix. On output a
126*> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
127*> to the isolated diagonal block in rows and columns KTOP
128*> through KBOT.
129*> \endverbatim
130*>
131*> \param[in] LDH
132*> \verbatim
133*> LDH is INTEGER
134*> LDH is the leading dimension of H just as declared in the
135*> calling procedure. LDH >= MAX(1,N).
136*> \endverbatim
137*>
138*> \param[in] ILOZ
139*> \verbatim
140*> ILOZ is INTEGER
141*> \endverbatim
142*>
143*> \param[in] IHIZ
144*> \verbatim
145*> IHIZ is INTEGER
146*> Specify the rows of Z to which transformations must be
147*> applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N
148*> \endverbatim
149*>
150*> \param[in,out] Z
151*> \verbatim
152*> Z is DOUBLE PRECISION array, dimension (LDZ,IHIZ)
153*> If WANTZ = .TRUE., then the QR Sweep orthogonal
154*> similarity transformation is accumulated into
155*> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
156*> If WANTZ = .FALSE., then Z is unreferenced.
157*> \endverbatim
158*>
159*> \param[in] LDZ
160*> \verbatim
161*> LDZ is INTEGER
162*> LDA is the leading dimension of Z just as declared in
163*> the calling procedure. LDZ >= N.
164*> \endverbatim
165*>
166*> \param[out] V
167*> \verbatim
168*> V is DOUBLE PRECISION array, dimension (LDV,NSHFTS/2)
169*> \endverbatim
170*>
171*> \param[in] LDV
172*> \verbatim
173*> LDV is INTEGER
174*> LDV is the leading dimension of V as declared in the
175*> calling procedure. LDV >= 3.
176*> \endverbatim
177*>
178*> \param[out] U
179*> \verbatim
180*> U is DOUBLE PRECISION array, dimension (LDU,2*NSHFTS)
181*> \endverbatim
182*>
183*> \param[in] LDU
184*> \verbatim
185*> LDU is INTEGER
186*> LDU is the leading dimension of U just as declared in the
187*> in the calling subroutine. LDU >= 2*NSHFTS.
188*> \endverbatim
189*>
190*> \param[in] NV
191*> \verbatim
192*> NV is INTEGER
193*> NV is the number of rows in WV agailable for workspace.
194*> NV >= 1.
195*> \endverbatim
196*>
197*> \param[out] WV
198*> \verbatim
199*> WV is DOUBLE PRECISION array, dimension (LDWV,2*NSHFTS)
200*> \endverbatim
201*>
202*> \param[in] LDWV
203*> \verbatim
204*> LDWV is INTEGER
205*> LDWV is the leading dimension of WV as declared in the
206*> in the calling subroutine. LDWV >= NV.
207*> \endverbatim
208*
209*> \param[in] NH
210*> \verbatim
211*> NH is INTEGER
212*> NH is the number of columns in array WH available for
213*> workspace. NH >= 1.
214*> \endverbatim
215*>
216*> \param[out] WH
217*> \verbatim
218*> WH is DOUBLE PRECISION array, dimension (LDWH,NH)
219*> \endverbatim
220*>
221*> \param[in] LDWH
222*> \verbatim
223*> LDWH is INTEGER
224*> Leading dimension of WH just as declared in the
225*> calling procedure. LDWH >= 2*NSHFTS.
226*> \endverbatim
227*>
228* Authors:
229* ========
230*
231*> \author Univ. of Tennessee
232*> \author Univ. of California Berkeley
233*> \author Univ. of Colorado Denver
234*> \author NAG Ltd.
235*
236*> \ingroup doubleOTHERauxiliary
237*
238*> \par Contributors:
239* ==================
240*>
241*> Karen Braman and Ralph Byers, Department of Mathematics,
242*> University of Kansas, USA
243*>
244*> Lars Karlsson, Daniel Kressner, and Bruno Lang
245*>
246*> Thijs Steel, Department of Computer science,
247*> KU Leuven, Belgium
248*
249*> \par References:
250* ================
251*>
252*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
253*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
254*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages
255*> 929--947, 2002.
256*>
257*> Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed
258*> chains of bulges in multishift QR algorithms.
259*> ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014).
260*>
261* =====================================================================
262 SUBROUTINE dlaqr5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
263 $ SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
264 $ LDU, NV, WV, LDWV, NH, WH, LDWH )
265 IMPLICIT NONE
266*
267* -- LAPACK auxiliary routine --
268* -- LAPACK is a software package provided by Univ. of Tennessee, --
269* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
270*
271* .. Scalar Arguments ..
272 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
273 $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
274 LOGICAL WANTT, WANTZ
275* ..
276* .. Array Arguments ..
277 DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
278 $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
279 $ z( ldz, * )
280* ..
281*
282* ================================================================
283* .. Parameters ..
284 DOUBLE PRECISION ZERO, ONE
285 PARAMETER ( ZERO = 0.0d0, one = 1.0d0 )
286* ..
287* .. Local Scalars ..
288 DOUBLE PRECISION ALPHA, BETA, H11, H12, H21, H22, REFSUM,
289 $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2,
290 $ ulp
291 INTEGER I, I2, I4, INCOL, J, JBOT, JCOL, JLEN,
292 $ JROW, JTOP, K, K1, KDU, KMS, KRCOL,
293 $ m, m22, mbot, mtop, nbmps, ndcol,
294 $ ns, nu
295 LOGICAL ACCUM, BMP22
296* ..
297* .. External Functions ..
298 DOUBLE PRECISION DLAMCH
299 EXTERNAL DLAMCH
300* ..
301* .. Intrinsic Functions ..
302*
303 INTRINSIC abs, dble, max, min, mod
304* ..
305* .. Local Arrays ..
306 DOUBLE PRECISION VT( 3 )
307* ..
308* .. External Subroutines ..
309 EXTERNAL dgemm, dlabad, dlacpy, dlaqr1, dlarfg, dlaset,
310 $ dtrmm
311* ..
312* .. Executable Statements ..
313*
314* ==== If there are no shifts, then there is nothing to do. ====
315*
316 IF( nshfts.LT.2 )
317 $ RETURN
318*
319* ==== If the active block is empty or 1-by-1, then there
320* . is nothing to do. ====
321*
322 IF( ktop.GE.kbot )
323 $ RETURN
324*
325* ==== Shuffle shifts into pairs of real shifts and pairs
326* . of complex conjugate shifts assuming complex
327* . conjugate shifts are already adjacent to one
328* . another. ====
329*
330 DO 10 i = 1, nshfts - 2, 2
331 IF( si( i ).NE.-si( i+1 ) ) THEN
332*
333 swap = sr( i )
334 sr( i ) = sr( i+1 )
335 sr( i+1 ) = sr( i+2 )
336 sr( i+2 ) = swap
337*
338 swap = si( i )
339 si( i ) = si( i+1 )
340 si( i+1 ) = si( i+2 )
341 si( i+2 ) = swap
342 END IF
343 10 CONTINUE
344*
345* ==== NSHFTS is supposed to be even, but if it is odd,
346* . then simply reduce it by one. The shuffle above
347* . ensures that the dropped shift is real and that
348* . the remaining shifts are paired. ====
349*
350 ns = nshfts - mod( nshfts, 2 )
351*
352* ==== Machine constants for deflation ====
353*
354 safmin = dlamch( 'SAFE MINIMUM' )
355 safmax = one / safmin
356 CALL dlabad( safmin, safmax )
357 ulp = dlamch( 'PRECISION' )
358 smlnum = safmin*( dble( n ) / ulp )
359*
360* ==== Use accumulated reflections to update far-from-diagonal
361* . entries ? ====
362*
363 accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
364*
365* ==== clear trash ====
366*
367 IF( ktop+2.LE.kbot )
368 $ h( ktop+2, ktop ) = zero
369*
370* ==== NBMPS = number of 2-shift bulges in the chain ====
371*
372 nbmps = ns / 2
373*
374* ==== KDU = width of slab ====
375*
376 kdu = 4*nbmps
377*
378* ==== Create and chase chains of NBMPS bulges ====
379*
380 DO 180 incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
381*
382* JTOP = Index from which updates from the right start.
383*
384 IF( accum ) THEN
385 jtop = max( ktop, incol )
386 ELSE IF( wantt ) THEN
387 jtop = 1
388 ELSE
389 jtop = ktop
390 END IF
391*
392 ndcol = incol + kdu
393 IF( accum )
394 $ CALL dlaset( 'ALL', kdu, kdu, zero, one, u, ldu )
395*
396* ==== Near-the-diagonal bulge chase. The following loop
397* . performs the near-the-diagonal part of a small bulge
398* . multi-shift QR sweep. Each 4*NBMPS column diagonal
399* . chunk extends from column INCOL to column NDCOL
400* . (including both column INCOL and column NDCOL). The
401* . following loop chases a 2*NBMPS+1 column long chain of
402* . NBMPS bulges 2*NBMPS columns to the right. (INCOL
403* . may be less than KTOP and and NDCOL may be greater than
404* . KBOT indicating phantom columns from which to chase
405* . bulges before they are actually introduced or to which
406* . to chase bulges beyond column KBOT.) ====
407*
408 DO 145 krcol = incol, min( incol+2*nbmps-1, kbot-2 )
409*
410* ==== Bulges number MTOP to MBOT are active double implicit
411* . shift bulges. There may or may not also be small
412* . 2-by-2 bulge, if there is room. The inactive bulges
413* . (if any) must wait until the active bulges have moved
414* . down the diagonal to make room. The phantom matrix
415* . paradigm described above helps keep track. ====
416*
417 mtop = max( 1, ( ktop-krcol ) / 2+1 )
418 mbot = min( nbmps, ( kbot-krcol-1 ) / 2 )
419 m22 = mbot + 1
420 bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+2*( m22-1 ) ).EQ.
421 $ ( kbot-2 )
422*
423* ==== Generate reflections to chase the chain right
424* . one column. (The minimum value of K is KTOP-1.) ====
425*
426 IF ( bmp22 ) THEN
427*
428* ==== Special case: 2-by-2 reflection at bottom treated
429* . separately ====
430*
431 k = krcol + 2*( m22-1 )
432 IF( k.EQ.ktop-1 ) THEN
433 CALL dlaqr1( 2, h( k+1, k+1 ), ldh, sr( 2*m22-1 ),
434 $ si( 2*m22-1 ), sr( 2*m22 ), si( 2*m22 ),
435 $ v( 1, m22 ) )
436 beta = v( 1, m22 )
437 CALL dlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
438 ELSE
439 beta = h( k+1, k )
440 v( 2, m22 ) = h( k+2, k )
441 CALL dlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
442 h( k+1, k ) = beta
443 h( k+2, k ) = zero
444 END IF
445
446*
447* ==== Perform update from right within
448* . computational window. ====
449*
450 DO 30 j = jtop, min( kbot, k+3 )
451 refsum = v( 1, m22 )*( h( j, k+1 )+v( 2, m22 )*
452 $ h( j, k+2 ) )
453 h( j, k+1 ) = h( j, k+1 ) - refsum
454 h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2, m22 )
455 30 CONTINUE
456*
457* ==== Perform update from left within
458* . computational window. ====
459*
460 IF( accum ) THEN
461 jbot = min( ndcol, kbot )
462 ELSE IF( wantt ) THEN
463 jbot = n
464 ELSE
465 jbot = kbot
466 END IF
467 DO 40 j = k+1, jbot
468 refsum = v( 1, m22 )*( h( k+1, j )+v( 2, m22 )*
469 $ h( k+2, j ) )
470 h( k+1, j ) = h( k+1, j ) - refsum
471 h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m22 )
472 40 CONTINUE
473*
474* ==== The following convergence test requires that
475* . the tradition small-compared-to-nearby-diagonals
476* . criterion and the Ahues & Tisseur (LAWN 122, 1997)
477* . criteria both be satisfied. The latter improves
478* . accuracy in some examples. Falling back on an
479* . alternate convergence criterion when TST1 or TST2
480* . is zero (as done here) is traditional but probably
481* . unnecessary. ====
482*
483 IF( k.GE.ktop ) THEN
484 IF( h( k+1, k ).NE.zero ) THEN
485 tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
486 IF( tst1.EQ.zero ) THEN
487 IF( k.GE.ktop+1 )
488 $ tst1 = tst1 + abs( h( k, k-1 ) )
489 IF( k.GE.ktop+2 )
490 $ tst1 = tst1 + abs( h( k, k-2 ) )
491 IF( k.GE.ktop+3 )
492 $ tst1 = tst1 + abs( h( k, k-3 ) )
493 IF( k.LE.kbot-2 )
494 $ tst1 = tst1 + abs( h( k+2, k+1 ) )
495 IF( k.LE.kbot-3 )
496 $ tst1 = tst1 + abs( h( k+3, k+1 ) )
497 IF( k.LE.kbot-4 )
498 $ tst1 = tst1 + abs( h( k+4, k+1 ) )
499 END IF
500 IF( abs( h( k+1, k ) )
501 $ .LE.max( smlnum, ulp*tst1 ) ) THEN
502 h12 = max( abs( h( k+1, k ) ),
503 $ abs( h( k, k+1 ) ) )
504 h21 = min( abs( h( k+1, k ) ),
505 $ abs( h( k, k+1 ) ) )
506 h11 = max( abs( h( k+1, k+1 ) ),
507 $ abs( h( k, k )-h( k+1, k+1 ) ) )
508 h22 = min( abs( h( k+1, k+1 ) ),
509 $ abs( h( k, k )-h( k+1, k+1 ) ) )
510 scl = h11 + h12
511 tst2 = h22*( h11 / scl )
512*
513 IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
514 $ max( smlnum, ulp*tst2 ) ) THEN
515 h( k+1, k ) = zero
516 END IF
517 END IF
518 END IF
519 END IF
520*
521* ==== Accumulate orthogonal transformations. ====
522*
523 IF( accum ) THEN
524 kms = k - incol
525 DO 50 j = max( 1, ktop-incol ), kdu
526 refsum = v( 1, m22 )*( u( j, kms+1 )+
527 $ v( 2, m22 )*u( j, kms+2 ) )
528 u( j, kms+1 ) = u( j, kms+1 ) - refsum
529 u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2, m22 )
530 50 CONTINUE
531 ELSE IF( wantz ) THEN
532 DO 60 j = iloz, ihiz
533 refsum = v( 1, m22 )*( z( j, k+1 )+v( 2, m22 )*
534 $ z( j, k+2 ) )
535 z( j, k+1 ) = z( j, k+1 ) - refsum
536 z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m22 )
537 60 CONTINUE
538 END IF
539 END IF
540*
541* ==== Normal case: Chain of 3-by-3 reflections ====
542*
543 DO 80 m = mbot, mtop, -1
544 k = krcol + 2*( m-1 )
545 IF( k.EQ.ktop-1 ) THEN
546 CALL dlaqr1( 3, h( ktop, ktop ), ldh, sr( 2*m-1 ),
547 $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
548 $ v( 1, m ) )
549 alpha = v( 1, m )
550 CALL dlarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
551 ELSE
552*
553* ==== Perform delayed transformation of row below
554* . Mth bulge. Exploit fact that first two elements
555* . of row are actually zero. ====
556*
557 refsum = v( 1, m )*v( 3, m )*h( k+3, k+2 )
558 h( k+3, k ) = -refsum
559 h( k+3, k+1 ) = -refsum*v( 2, m )
560 h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*v( 3, m )
561*
562* ==== Calculate reflection to move
563* . Mth bulge one step. ====
564*
565 beta = h( k+1, k )
566 v( 2, m ) = h( k+2, k )
567 v( 3, m ) = h( k+3, k )
568 CALL dlarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
569*
570* ==== A Bulge may collapse because of vigilant
571* . deflation or destructive underflow. In the
572* . underflow case, try the two-small-subdiagonals
573* . trick to try to reinflate the bulge. ====
574*
575 IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
576 $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
577*
578* ==== Typical case: not collapsed (yet). ====
579*
580 h( k+1, k ) = beta
581 h( k+2, k ) = zero
582 h( k+3, k ) = zero
583 ELSE
584*
585* ==== Atypical case: collapsed. Attempt to
586* . reintroduce ignoring H(K+1,K) and H(K+2,K).
587* . If the fill resulting from the new
588* . reflector is too large, then abandon it.
589* . Otherwise, use the new one. ====
590*
591 CALL dlaqr1( 3, h( k+1, k+1 ), ldh, sr( 2*m-1 ),
592 $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
593 $ vt )
594 alpha = vt( 1 )
595 CALL dlarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
596 refsum = vt( 1 )*( h( k+1, k )+vt( 2 )*
597 $ h( k+2, k ) )
598*
599 IF( abs( h( k+2, k )-refsum*vt( 2 ) )+
600 $ abs( refsum*vt( 3 ) ).GT.ulp*
601 $ ( abs( h( k, k ) )+abs( h( k+1,
602 $ k+1 ) )+abs( h( k+2, k+2 ) ) ) ) THEN
603*
604* ==== Starting a new bulge here would
605* . create non-negligible fill. Use
606* . the old one with trepidation. ====
607*
608 h( k+1, k ) = beta
609 h( k+2, k ) = zero
610 h( k+3, k ) = zero
611 ELSE
612*
613* ==== Starting a new bulge here would
614* . create only negligible fill.
615* . Replace the old reflector with
616* . the new one. ====
617*
618 h( k+1, k ) = h( k+1, k ) - refsum
619 h( k+2, k ) = zero
620 h( k+3, k ) = zero
621 v( 1, m ) = vt( 1 )
622 v( 2, m ) = vt( 2 )
623 v( 3, m ) = vt( 3 )
624 END IF
625 END IF
626 END IF
627*
628* ==== Apply reflection from the right and
629* . the first column of update from the left.
630* . These updates are required for the vigilant
631* . deflation check. We still delay most of the
632* . updates from the left for efficiency. ====
633*
634 DO 70 j = jtop, min( kbot, k+3 )
635 refsum = v( 1, m )*( h( j, k+1 )+v( 2, m )*
636 $ h( j, k+2 )+v( 3, m )*h( j, k+3 ) )
637 h( j, k+1 ) = h( j, k+1 ) - refsum
638 h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2, m )
639 h( j, k+3 ) = h( j, k+3 ) - refsum*v( 3, m )
640 70 CONTINUE
641*
642* ==== Perform update from left for subsequent
643* . column. ====
644*
645 refsum = v( 1, m )*( h( k+1, k+1 )+v( 2, m )*
646 $ h( k+2, k+1 )+v( 3, m )*h( k+3, k+1 ) )
647 h( k+1, k+1 ) = h( k+1, k+1 ) - refsum
648 h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*v( 2, m )
649 h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*v( 3, m )
650*
651* ==== The following convergence test requires that
652* . the tradition small-compared-to-nearby-diagonals
653* . criterion and the Ahues & Tisseur (LAWN 122, 1997)
654* . criteria both be satisfied. The latter improves
655* . accuracy in some examples. Falling back on an
656* . alternate convergence criterion when TST1 or TST2
657* . is zero (as done here) is traditional but probably
658* . unnecessary. ====
659*
660 IF( k.LT.ktop)
661 $ cycle
662 IF( h( k+1, k ).NE.zero ) THEN
663 tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
664 IF( tst1.EQ.zero ) THEN
665 IF( k.GE.ktop+1 )
666 $ tst1 = tst1 + abs( h( k, k-1 ) )
667 IF( k.GE.ktop+2 )
668 $ tst1 = tst1 + abs( h( k, k-2 ) )
669 IF( k.GE.ktop+3 )
670 $ tst1 = tst1 + abs( h( k, k-3 ) )
671 IF( k.LE.kbot-2 )
672 $ tst1 = tst1 + abs( h( k+2, k+1 ) )
673 IF( k.LE.kbot-3 )
674 $ tst1 = tst1 + abs( h( k+3, k+1 ) )
675 IF( k.LE.kbot-4 )
676 $ tst1 = tst1 + abs( h( k+4, k+1 ) )
677 END IF
678 IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
679 $ THEN
680 h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
681 h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
682 h11 = max( abs( h( k+1, k+1 ) ),
683 $ abs( h( k, k )-h( k+1, k+1 ) ) )
684 h22 = min( abs( h( k+1, k+1 ) ),
685 $ abs( h( k, k )-h( k+1, k+1 ) ) )
686 scl = h11 + h12
687 tst2 = h22*( h11 / scl )
688*
689 IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
690 $ max( smlnum, ulp*tst2 ) ) THEN
691 h( k+1, k ) = zero
692 END IF
693 END IF
694 END IF
695 80 CONTINUE
696*
697* ==== Multiply H by reflections from the left ====
698*
699 IF( accum ) THEN
700 jbot = min( ndcol, kbot )
701 ELSE IF( wantt ) THEN
702 jbot = n
703 ELSE
704 jbot = kbot
705 END IF
706*
707 DO 100 m = mbot, mtop, -1
708 k = krcol + 2*( m-1 )
709 DO 90 j = max( ktop, krcol + 2*m ), jbot
710 refsum = v( 1, m )*( h( k+1, j )+v( 2, m )*
711 $ h( k+2, j )+v( 3, m )*h( k+3, j ) )
712 h( k+1, j ) = h( k+1, j ) - refsum
713 h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m )
714 h( k+3, j ) = h( k+3, j ) - refsum*v( 3, m )
715 90 CONTINUE
716 100 CONTINUE
717*
718* ==== Accumulate orthogonal transformations. ====
719*
720 IF( accum ) THEN
721*
722* ==== Accumulate U. (If needed, update Z later
723* . with an efficient matrix-matrix
724* . multiply.) ====
725*
726 DO 120 m = mbot, mtop, -1
727 k = krcol + 2*( m-1 )
728 kms = k - incol
729 i2 = max( 1, ktop-incol )
730 i2 = max( i2, kms-(krcol-incol)+1 )
731 i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 )
732 DO 110 j = i2, i4
733 refsum = v( 1, m )*( u( j, kms+1 )+v( 2, m )*
734 $ u( j, kms+2 )+v( 3, m )*u( j, kms+3 ) )
735 u( j, kms+1 ) = u( j, kms+1 ) - refsum
736 u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2, m )
737 u( j, kms+3 ) = u( j, kms+3 ) - refsum*v( 3, m )
738 110 CONTINUE
739 120 CONTINUE
740 ELSE IF( wantz ) THEN
741*
742* ==== U is not accumulated, so update Z
743* . now by multiplying by reflections
744* . from the right. ====
745*
746 DO 140 m = mbot, mtop, -1
747 k = krcol + 2*( m-1 )
748 DO 130 j = iloz, ihiz
749 refsum = v( 1, m )*( z( j, k+1 )+v( 2, m )*
750 $ z( j, k+2 )+v( 3, m )*z( j, k+3 ) )
751 z( j, k+1 ) = z( j, k+1 ) - refsum
752 z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m )
753 z( j, k+3 ) = z( j, k+3 ) - refsum*v( 3, m )
754 130 CONTINUE
755 140 CONTINUE
756 END IF
757*
758* ==== End of near-the-diagonal bulge chase. ====
759*
760 145 CONTINUE
761*
762* ==== Use U (if accumulated) to update far-from-diagonal
763* . entries in H. If required, use U to update Z as
764* . well. ====
765*
766 IF( accum ) THEN
767 IF( wantt ) THEN
768 jtop = 1
769 jbot = n
770 ELSE
771 jtop = ktop
772 jbot = kbot
773 END IF
774 k1 = max( 1, ktop-incol )
775 nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
776*
777* ==== Horizontal Multiply ====
778*
779 DO 150 jcol = min( ndcol, kbot ) + 1, jbot, nh
780 jlen = min( nh, jbot-jcol+1 )
781 CALL dgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),
782 $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
783 $ ldwh )
784 CALL dlacpy( 'ALL', nu, jlen, wh, ldwh,
785 $ h( incol+k1, jcol ), ldh )
786 150 CONTINUE
787*
788* ==== Vertical multiply ====
789*
790 DO 160 jrow = jtop, max( ktop, incol ) - 1, nv
791 jlen = min( nv, max( ktop, incol )-jrow )
792 CALL dgemm( 'N', 'N', jlen, nu, nu, one,
793 $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
794 $ ldu, zero, wv, ldwv )
795 CALL dlacpy( 'ALL', jlen, nu, wv, ldwv,
796 $ h( jrow, incol+k1 ), ldh )
797 160 CONTINUE
798*
799* ==== Z multiply (also vertical) ====
800*
801 IF( wantz ) THEN
802 DO 170 jrow = iloz, ihiz, nv
803 jlen = min( nv, ihiz-jrow+1 )
804 CALL dgemm( 'N', 'N', jlen, nu, nu, one,
805 $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
806 $ ldu, zero, wv, ldwv )
807 CALL dlacpy( 'ALL', jlen, nu, wv, ldwv,
808 $ z( jrow, incol+k1 ), ldz )
809 170 CONTINUE
810 END IF
811 END IF
812 180 CONTINUE
813*
814* ==== End of DLAQR5 ====
815*
816 END
subroutine dlabad(small, large)
DLABAD
Definition dlabad.f:74
subroutine dlacpy(uplo, m, n, a, lda, b, ldb)
DLACPY copies all or part of one two-dimensional array to another.
Definition dlacpy.f:103
subroutine dlaset(uplo, m, n, alpha, beta, a, lda)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition dlaset.f:110
subroutine dlaqr1(n, h, ldh, sr1, si1, sr2, si2, v)
DLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
Definition dlaqr1.f:121
subroutine dlaqr5(wantt, wantz, kacc22, n, ktop, kbot, nshfts, sr, si, h, ldh, iloz, ihiz, z, ldz, v, ldv, u, ldu, nv, wv, ldwv, nh, wh, ldwh)
DLAQR5 performs a single small-bulge multi-shift QR sweep.
Definition dlaqr5.f:265
subroutine dlarfg(n, alpha, x, incx, tau)
DLARFG generates an elementary reflector (Householder matrix).
Definition dlarfg.f:106
subroutine dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
DGEMM
Definition dgemm.f:187
subroutine dtrmm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
DTRMM
Definition dtrmm.f:177
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21