OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pzhettrd.f File Reference

Go to the source code of this file.

Functions/Subroutines

subroutine pzhettrd (uplo, n, a, ia, ja, desca, d, e, tau, work, lwork, info)

Function/Subroutine Documentation

◆ pzhettrd()

subroutine pzhettrd ( character uplo,
integer n,
complex*16, dimension( * ) a,
integer ia,
integer ja,
integer, dimension( * ) desca,
double precision, dimension( * ) d,
double precision, dimension( * ) e,
complex*16, dimension( * ) tau,
complex*16, dimension( * ) work,
integer lwork,
integer info )

Definition at line 1 of file pzhettrd.f.

3*
4* -- ScaLAPACK routine (version 2.0.2) --
5* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
6* May 1 2012
7*
8* .. Scalar Arguments ..
9 CHARACTER UPLO
10 INTEGER IA, INFO, JA, LWORK, N
11* ..
12* .. Array Arguments ..
13 INTEGER DESCA( * )
14 DOUBLE PRECISION D( * ), E( * )
15 COMPLEX*16 A( * ), TAU( * ), WORK( * )
16* ..
17*
18* Purpose
19*
20* =======
21*
22* PZHETTRD reduces a complex Hermitian matrix sub( A ) to Hermitian
23* tridiagonal form T by an unitary similarity transformation:
24* Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1).
25*
26* Notes
27* =====
28*
29* Each global data object is described by an associated description
30* vector. This vector stores the information required to establish
31* the mapping between an object element and its corresponding
32* process and memory location.
33*
34* Let A be a generic term for any 2D block cyclicly distributed
35* array.
36* Such a global array has an associated description vector DESCA.
37* In the following comments, the character _ should be read as
38* "of the global array".
39*
40* NOTATION STORED IN EXPLANATION
41* --------------- -------------- -----------------------------------
42* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
43* DTYPE_A = 1.
44* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle,
45* indicating the BLACS process grid A is distribu-
46* ted over. The context itself is glo-
47* bal, but the handle (the integer
48* value) may vary.
49* M_A (global) DESCA( M_ ) The number of rows in the global
50* array A.
51* N_A (global) DESCA( N_ ) The number of columns in the global
52* array A.
53* MB_A (global) DESCA( MB_ ) The blocking factor used to
54* distribute the rows of the array.
55* NB_A (global) DESCA( NB_ ) The blocking factor used to
56* distribute the columns of the array.
57* RSRC_A (global) DESCA( RSRC_ ) The process row over which the
58* first row of the array A is distributed.
59* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
60* first column of the array A is
61* distributed.
62* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
63* array. LLD_A >= MAX(1,LOCp(M_A)).
64*
65* Let K be the number of rows or columns of a distributed matrix,
66* and assume that its process grid has dimension p x q.
67* LOCp( K ) denotes the number of elements of K that a process
68* would receive if K were distributed over the p processes of its
69* process column.
70* Similarly, LOCq( K ) denotes the number of elements of K that a
71* process would receive if K were distributed over the q processes
72* of its process row.
73* The values of LOCp() and LOCq() may be determined via a call to
74* the ScaLAPACK tool function, NUMROC:
75* LOCp( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
76* LOCq( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
77* An upper bound for these quantities may be computed by:
78* LOCp( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
79* LOCq( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
80*
81* Arguments
82* =========
83*
84* UPLO (global input) CHARACTER
85* Specifies whether the upper or lower triangular part of the
86* Hermitian matrix sub( A ) is stored:
87* = 'U': Upper triangular
88* = 'L': Lower triangular
89*
90* N (global input) INTEGER
91* The number of rows and columns to be operated on, i.e. the
92* order of the distributed submatrix sub( A ). N >= 0.
93*
94* A (local input/local output) COMPLEX*16 pointer into the
95* local memory to an array of dimension (LLD_A,LOCq(JA+N-1)).
96* On entry, this array contains the local pieces of the
97* Hermitian distributed matrix sub( A ). If UPLO = 'U', the
98* leading N-by-N upper triangular part of sub( A ) contains
99* the upper triangular part of the matrix, and its strictly
100* lower triangular part is not referenced. If UPLO = 'L', the
101* leading N-by-N lower triangular part of sub( A ) contains the
102* lower triangular part of the matrix, and its strictly upper
103* triangular part is not referenced. On exit, if UPLO = 'U',
104* the diagonal and first superdiagonal of sub( A ) are over-
105* written by the corresponding elements of the tridiagonal
106* matrix T, and the elements above the first superdiagonal,
107* with the array TAU, represent the unitary matrix Q as a
108* product of elementary reflectors; if UPLO = 'L', the diagonal
109* and first subdiagonal of sub( A ) are overwritten by the
110* corresponding elements of the tridiagonal matrix T, and the
111* elements below the first subdiagonal, with the array TAU,
112* represent the unitary matrix Q as a product of elementary
113* reflectors. See Further Details.
114*
115* IA (global input) INTEGER
116* The row index in the global array A indicating the first
117* row of sub( A ).
118*
119* JA (global input) INTEGER
120* The column index in the global array A indicating the
121* first column of sub( A ).
122*
123* DESCA (global and local input) INTEGER array of dimension DLEN_.
124* The array descriptor for the distributed matrix A.
125*
126* D (local output) DOUBLE PRECISION array, dim LOCq(JA+N-1)
127* The diagonal elements of the tridiagonal matrix T:
128* D(i) = A(i,i). D is tied to the distributed matrix A.
129*
130* E (local output) DOUBLE PRECISION array, dim LOCq(JA+N-1)
131* if UPLO = 'U', LOCq(JA+N-2) otherwise. The off-diagonal
132* elements of the tridiagonal matrix T: E(i) = A(i,i+1) if
133* UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the
134* distributed matrix A.
135*
136* TAU (local output) COMPLEX*16, array, dimension
137* LOCq(JA+N-1). This array contains the scalar factors TAU of
138* the elementary reflectors. TAU is tied to the distributed
139* matrix A.
140*
141* WORK (local workspace) COMPLEX*16 array, dimension (LWORK)
142* On exit, WORK( 1 ) returns the minimal and optimal workspace
143*
144* LWORK (local input) INTEGER
145* The dimension of the array WORK.
146* LWORK >= 2*( ANB+1 )*( 4*NPS+2 ) + NPS
147* Where:
148* NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB )
149* ANB = PJLAENV( DESCA( CTXT_ ), 3, 'PZHETTRD', 'L', 0, 0,
150* 0, 0 )
151*
152* NUMROC is a ScaLAPACK tool function;
153* PJLAENV is a ScaLAPACK envionmental inquiry function
154* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
155* the subroutine BLACS_GRIDINFO.
156*
157* INFO (global output) INTEGER
158* = 0: successful exit
159* < 0: If the i-th argument is an array and the j-entry had
160* an illegal value, then INFO = -(i*100+j), if the i-th
161* argument is a scalar and had an illegal value, then
162* INFO = -i.
163*
164* Further Details
165* ===============
166*
167* If UPLO = 'U', the matrix Q is represented as a product of
168* elementary reflectors
169*
170* Q = H(n-1) . . . H(2) H(1).
171*
172* Each H(i) has the form
173*
174* H(i) = I - tau * v * v'
175*
176* where tau is a complex scalar, and v is a complex vector with
177* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
178* A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1).
179*
180* If UPLO = 'L', the matrix Q is represented as a product of
181* elementary reflectors
182*
183* Q = H(1) H(2) . . . H(n-1).
184*
185* Each H(i) has the form
186*
187* H(i) = I - tau * v * v'
188*
189* where tau is a complex scalar, and v is a complex vector with
190* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in
191* A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1).
192*
193* The contents of sub( A ) on exit are illustrated by the following
194* examples with n = 5:
195*
196* if UPLO = 'U': if UPLO = 'L':
197*
198* ( d e v2 v3 v4 ) ( d )
199* ( d e v3 v4 ) ( e d )
200* ( d e v4 ) ( v1 e d )
201* ( d e ) ( v1 v2 e d )
202* ( d ) ( v1 v2 v3 e d )
203*
204* where d and e denote diagonal and off-diagonal elements of T, and
205* vi denotes an element of the vector defining H(i).
206*
207* Data storage requirements
208* =========================
209*
210* PZHETTRD is not intended to be called directly. All users are
211* encourage to call PZHETRD which will then call PZHETTRD if
212* appropriate. A must be in cyclic format (i.e. MB = NB = 1),
213* the process grid must be square ( i.e. NPROW = NPCOL ) and
214* only lower triangular storage is supported.
215*
216* Local variables
217* ===============
218*
219* PZHETTRD uses five local arrays:
220* WORK ( InV ) dimension ( NP, ANB+1): array V
221* WORK ( InH ) dimension ( NP, ANB+1): array H
222* WORK ( InVT ) dimension ( NQ, ANB+1): transpose of the array V
223* WORK ( InHT ) dimension ( NQ, ANB+1): transpose of the array H
224* WORK ( InVTT ) dimension ( NQ, 1): transpose of the array VT
225*
226* Arrays V and H are replicated across all processor columns.
227* Arrays V^T and H^T are replicated across all processor rows.
228*
229* WORK ( InVT ), or V^T, is stored as a tall skinny
230* array ( NQ x ANB-1 ) for efficiency. Since only the lower
231* triangular portion of A is updated, Av is computed as:
232* tril(A) * v + v^T * tril(A,-1). This is performed as
233* two local triangular matrix-vector multiplications (both in
234* MVR2) followed by a transpose and a sum across the columns.
235* In the local computation, WORK( InVT ) is used to compute
236* tril(A) * v and WORK( InV ) is used to compute
237* v^T * tril(A,-1)
238*
239* The following variables are global indices into A:
240* INDEX: The current global row and column number.
241* MAXINDEX: The global row and column for the first row and
242* column in the trailing block of A.
243* LIIB, LIJB: The first row, column in
244*
245* The following variables point into the arrays A, V, H, V^T, H^T:
246* BINDEX =INDEX-MININDEX: The column index in V, H, V^T, H^T.
247* LII: local index I: The local row number for row INDEX
248* LIJ: local index J: The local column number for column INDEX
249* LIIP1: local index I+1: The local row number for row INDEX+1
250* LIJP1: local index J+1: The local col number for col INDEX+1
251* LTLI: lower triangular local index I: The local row for the
252* upper left entry in tril( A(INDEX, INDEX) )
253* LTLIP1: lower triangular local index I+1: The local row for the
254* upper left entry in tril( A(INDEX+1, INDEX+1) )
255*
256* Details: The distinction between LII and LTLI (and between
257* LIIP1 and LTLIP1) is subtle. Within the current processor
258* column (i.e. MYCOL .eq. CURCOL) they are the same. However,
259* on some processors, A( LII, LIJ ) points to an element
260* above the diagonal, on these processors, LTLI = LII+1.
261*
262* The following variables give the number of rows and/or columns
263* in various matrices:
264* NP: The number of local rows in A( 1:N, 1:N )
265* NQ: The number of local columns in A( 1:N, 1:N )
266* NPM0: The number of local rows in A( INDEX:N, INDEX:N )
267* NQM0: The number of local columns in A( INDEX:N, INDEX:N )
268* NPM1: The number of local rows in A( INDEX+1:N, INDEX:N )
269* NQM1: The number of local columns in A( INDEX+1:N, INDEX:N )
270* LTNM0: The number of local rows & columns in
271* tril( A( INDEX:N, INDEX:N ) )
272* LTNM1: The number of local rows & columns in
273* tril( A( INDEX+1:N, INDEX+1:N ) )
274* NOTE: LTNM0 == LTNM1 on all processors except the diagonal
275* processors, i.e. those where MYCOL == MYROW.
276*
277* Invariants:
278* NP = NPM0 + LII - 1
279* NQ = NQM0 + LIJ - 1
280* NP = NPM1 + LIIP1 - 1
281* NQ = NQM1 + LIJP1 - 1
282* NP = LTLI + LTNM0 - 1
283* NP = LTLIP1 + LTNM1 - 1
284*
285* Temporary variables. The following variables are used within
286* a few lines after they are set and do hold state from one loop
287* iteration to the next:
288*
289* The matrix A:
290* The matrix A does not hold the same values that it would
291* in an unblocked code nor the values that it would hold in
292* in a blocked code.
293*
294* The value of A is confusing. It is easiest to state the
295* difference between trueA and A at the point that MVR2 is called,
296* so we will start there.
297*
298* Let trueA be the value that A would
299* have at a given point in an unblocked code and A
300* be the value that A has in this code at the same point.
301*
302* At the time of the call to MVR2,
303* trueA = A + V' * H + H' * V
304* where H = H( MAXINDEX:N, 1:BINDEX ) and
305* V = V( MAXINDEX:N, 1:BINDEX ).
306*
307* At the bottom of the inner loop,
308* trueA = A + V' * H + H' * V + v' * h + h' * v
309* where H = H( MAXINDEX:N, 1:BINDEX ) and
310* V = V( MAXINDEX:N, 1:BINDEX ) and
311* v = V( liip1:N, BINDEX+1 ) and
312* h = H( liip1:N, BINDEX+1 )
313*
314* At the top of the loop, BINDEX gets incremented, hence:
315* trueA = A + V' * H + H' * V + v' * h + h' * v
316* where H = H( MAXINDEX:N, 1:BINDEX-1 ) and
317* V = V( MAXINDEX:N, 1:BINDEX-1 ) and
318* v = V( liip1:N, BINDEX ) and
319* h = H( liip1:N, BINDEX )
320*
321*
322* A gets updated at the bottom of the outer loop
323* After this update, trueA = A + v' * h + h' * v
324* where v = V( liip1:N, BINDEX ) and
325* h = H( liip1:N, BINDEX ) and BINDEX = 0
326* Indeed, the previous loop invariant as stated above for the
327* top of the loop still holds, but with BINDEX = 0, H and V
328* are null matrices.
329*
330* After the current column of A is updated,
331* trueA( INDEX, INDEX:N ) = A( INDEX, INDEX:N )
332* the rest of A is untouched.
333*
334* After the current block column of A is updated,
335* trueA = A + V' * H + H' * V
336* where H = H( MAXINDEX:N, 1:BINDEX ) and
337* V = V( MAXINDEX:N, 1:BINDEX )
338*
339* This brings us back to the point at which mvr2 is called.
340*
341*
342* Details of the parallelization:
343*
344* We delay spreading v across to all processor columns (which
345* would naturally happen at the bottom of the loop) in order to
346* combine the spread of v( : , i-1 ) with the spread of h( : , i )
347*
348* In order to compute h( :, i ), we must update A( :, i )
349* which means that the processor column owning A( :, i ) must
350* have: c, tau, v( i, i ) and h( i, i ).
351*
352* The traditional
353* way of computing v (and the one used in pzlatrd.f and
354* zlatrd.f) is:
355* v = tau * v
356* c = v' * h
357* alpha = - tau * c / 2
358* v = v + alpha * h
359* However, the traditional way of computing v requires that tau
360* be broadcast to all processors in the current column (to compute
361* v = tau * v) and then a sum-to-all is required (to
362* compute v' * h ). We use the following formula instead:
363* c = v' * h
364* v = tau * ( v - c * tau' * h / 2 )
365* The above formula allows tau to be spread down in the
366* same call to DGSUM2D which performs the sum-to-all of c.
367*
368* The computation of v, which could be performed in any processor
369* column (or other procesor subsets), is performed in the
370* processor column that owns A( :, i+1 ) so that A( :, i+1 )
371* can be updated prior to spreading v across.
372*
373* We keep the block column of A up-to-date to minimize the
374* work required in updating the current column of A. Updating
375* the block column of A is reasonably load balanced whereas
376* updating the current column of A is not (only the current
377* processor column is involved).
378*
379* In the following overview of the steps performed, M in the
380* margin indicates message traffic and C indicates O(n^2 nb/sqrt(p))
381* or more flops per processor.
382*
383* Inner loop:
384* A( index:n, index ) -= ( v * ht(bindex) + h * vt( bindex) )
385*M h = house( A(index:n, index) )
386*M Spread v, h across
387*M vt = v^T; ht = h^T
388* A( index+1:n, index+1:maxindex ) -=
389* ( v * ht(index+1:maxindex) + h *vt(index+1:maxindex) )
390*C v = tril(A) * h; vt = ht * tril(A,-1)
391*MorC v = v - H*V*h - V*H*h
392*M v = v + vt^T
393*M c = v' * h
394* v = tau * ( v - c * tau' * h / 2 )
395*C A = A - H*V - V*H
396*
397*
398*
399* =================================================================
400*
401* .. Parameters ..
402 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
403 $ MB_, NB_, RSRC_, CSRC_, LLD_
404 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
405 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
406 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
407 DOUBLE PRECISION ONE
408 parameter( one = 1.0d0 )
409 COMPLEX*16 Z_ONE, Z_NEGONE, Z_ZERO
410 parameter( z_one = 1.0d0, z_negone = -1.0d0,
411 $ z_zero = 0.0d0 )
412 DOUBLE PRECISION ZERO
413 parameter( zero = 0.0d+0 )
414* ..
415*
416*
417* .. Local Scalars ..
418*
419*
420 LOGICAL BALANCED, INTERLEAVE, TWOGEMMS, UPPER
421 INTEGER ANB, BINDEX, CURCOL, CURROW, I, ICTXT, INDEX,
422 $ INDEXA, INDEXINH, INDEXINV, INH, INHB, INHT,
423 $ INHTB, INTMP, INV, INVB, INVT, INVTB, J, LDA,
424 $ LDV, LDZG, LII, LIIB, LIIP1, LIJ, LIJB, LIJP1,
425 $ LTLIP1, LTNM1, LWMIN, MAXINDEX, MININDEX,
426 $ MYCOL, MYFIRSTROW, MYROW, MYSETNUM, NBZG, NP,
427 $ NPB, NPCOL, NPM0, NPM1, NPROW, NPS, NPSET, NQ,
428 $ NQB, NQM1, NUMROWS, NXTCOL, NXTROW, PBMAX,
429 $ PBMIN, PBSIZE, PNB, ROWSPERPROC
430 DOUBLE PRECISION NORM, SAFMAX, SAFMIN
431 COMPLEX*16 ALPHA, BETA, C, CONJTOPH, CONJTOPV,
432 $ ONEOVERBETA, TOPH, TOPNV, TOPTAU, TOPV
433* ..
434* .. Local Arrays ..
435*
436*
437*
438*
439 INTEGER IDUM1( 1 ), IDUM2( 1 )
440 DOUBLE PRECISION DTMP( 5 )
441 COMPLEX*16 CC( 3 )
442* ..
443* .. External Subroutines ..
445 $ dgebs2d, dgsum2d, pchk1mat, pdtreecomb,
447 $ zgerv2d, zgesd2d, zgsum2d, zlamov, zscal,
448 $ ztrmvt
449* ..
450* .. External Functions ..
451*
452 LOGICAL LSAME
453 INTEGER ICEIL, NUMROC, PJLAENV
454 DOUBLE PRECISION DZNRM2, PDLAMCH
455 EXTERNAL lsame, iceil, numroc, pjlaenv, dznrm2, pdlamch
456* ..
457* .. Intrinsic Functions ..
458 INTRINSIC dble, dcmplx, dconjg, dimag, ichar, max, min,
459 $ mod, sign, sqrt
460* ..
461*
462*
463* .. Executable Statements ..
464* This is just to keep ftnchek and toolpack/1 happy
465 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
466 $ rsrc_.LT.0 )RETURN
467*
468*
469*
470* Further details
471* ===============
472*
473* At the top of the loop, v and nh have been computed but not
474* spread across. Hence, A is out-of-date even after the
475* rank 2k update. Furthermore, we compute the next v before
476* nh is spread across.
477*
478* I claim that if we used a sum-to-all on NV, by summing CC within
479* each column, that we could compute NV locally and could avoid
480* spreading V across. Bruce claims that sum-to-all can be made
481* to cost no more than sum-to-one on the Paragon. If that is
482* true, this would be a win. But,
483* the BLACS sum-to-all is just a sum-to-one followed by a broadcast,
484* and hence the present scheme is better for now.
485*
486* Get grid parameters
487*
488 ictxt = desca( ctxt_ )
489 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
490*
491 safmax = sqrt( pdlamch( ictxt, 'O' ) ) / n
492 safmin = sqrt( pdlamch( ictxt, 'S' ) )
493*
494* Test the input parameters
495*
496 info = 0
497 IF( nprow.EQ.-1 ) THEN
498 info = -( 600+ctxt_ )
499 ELSE
500*
501* Here we set execution options for PZHETTRD
502*
503 pnb = pjlaenv( ictxt, 2, 'PZHETTRD', 'L', 0, 0, 0, 0 )
504 anb = pjlaenv( ictxt, 3, 'PZHETTRD', 'L', 0, 0, 0, 0 )
505*
506 interleave = ( pjlaenv( ictxt, 4, 'PZHETTRD', 'L', 1, 0, 0,
507 $ 0 ).EQ.1 )
508 twogemms = ( pjlaenv( ictxt, 4, 'PZHETTRD', 'L', 2, 0, 0,
509 $ 0 ).EQ.1 )
510 balanced = ( pjlaenv( ictxt, 4, 'PZHETTRD', 'L', 3, 0, 0,
511 $ 0 ).EQ.1 )
512*
513 CALL chk1mat( n, 2, n, 2, ia, ja, desca, 6, info )
514*
515*
516 upper = lsame( uplo, 'U' )
517 IF( info.EQ.0 .AND. desca( nb_ ).NE.1 )
518 $ info = 600 + nb_
519 IF( info.EQ.0 ) THEN
520*
521*
522* Here is the arithmetic:
523* Let maxnpq = max( np, nq, 2 * ANB )
524* LDV = 4 * max( np, nq ) + 2
525* LWMIN = 2 * ( ANB + 1 ) * LDV + MAX( np, 2 * ANB )
526* = 2 * ( ANB + 1 ) * ( 4 * NPS + 2 ) + NPS
527*
528* This overestimates memory requirements when ANB > NP/2
529* Memory requirements are lower when interleave = .false.
530* Hence, we could have two sets of memory requirements,
531* one for interleave and one for
532*
533*
534 nps = max( numroc( n, 1, 0, 0, nprow ), 2*anb )
535 lwmin = 2*( anb+1 )*( 4*nps+2 ) + nps
536*
537 work( 1 ) = dcmplx( lwmin )
538 IF( .NOT.lsame( uplo, 'L' ) ) THEN
539 info = -1
540 ELSE IF( ia.NE.1 ) THEN
541 info = -4
542 ELSE IF( ja.NE.1 ) THEN
543 info = -5
544 ELSE IF( nprow.NE.npcol ) THEN
545 info = -( 600+ctxt_ )
546 ELSE IF( desca( dtype_ ).NE.1 ) THEN
547 info = -( 600+dtype_ )
548 ELSE IF( desca( mb_ ).NE.1 ) THEN
549 info = -( 600+mb_ )
550 ELSE IF( desca( nb_ ).NE.1 ) THEN
551 info = -( 600+nb_ )
552 ELSE IF( desca( rsrc_ ).NE.0 ) THEN
553 info = -( 600+rsrc_ )
554 ELSE IF( desca( csrc_ ).NE.0 ) THEN
555 info = -( 600+csrc_ )
556 ELSE IF( lwork.LT.lwmin ) THEN
557 info = -11
558 END IF
559 END IF
560 IF( upper ) THEN
561 idum1( 1 ) = ichar( 'U' )
562 ELSE
563 idum1( 1 ) = ichar( 'l' )
564 END IF
565 IDUM2( 1 ) = 1
566*
567 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2,
568 $ INFO )
569 END IF
570*
571.NE. IF( INFO0 ) THEN
572 CALL PXERBLA( ICTXT, 'pzhettrd', -INFO )
573 RETURN
574 END IF
575*
576* Quick return if possible
577*
578.EQ. IF( N0 )
579 $ RETURN
580*
581*
582*
583* Reduce the lower triangle of sub( A )
584 NP = NUMROC( N, 1, MYROW, 0, NPROW )
585 NQ = NUMROC( N, 1, MYCOL, 0, NPCOL )
586*
587 NXTROW = 0
588 NXTCOL = 0
589*
590 LIIP1 = 1
591 LIJP1 = 1
592 NPM1 = NP
593 NQM1 = NQ
594*
595 LDA = DESCA( LLD_ )
596 ICTXT = DESCA( CTXT_ )
597*
598*
599*
600* Miscellaneous details:
601* Put tau, D and E in the right places
602* Check signs
603* Place all the arrays in WORK, control their placement
604* in memory.
605*
606*
607*
608* Loop invariants
609* A(LIIP1, LIJ) points to the first element of A(I+1,J)
610* NPM1,NQM1 = the number of rows, cols in A( LII+1:N,LIJ+1:N )
611* A(LII:N,LIJ:N) is one step out of date.
612* proc( CURROW, CURCOL ) owns A(LII,LIJ)
613* proc( NXTROW, CURCOL ) owns A(LIIP1,LIJ)
614*
615 INH = 1
616*
617 IF( INTERLEAVE ) THEN
618*
619* H and V are interleaved to minimize memory movement
620* LDV has to be twice as large to accomodate interleaving.
621* In addition, LDV is doubled again to allow v, h and
622* toptau to be spreaad across and transposed in a
623* single communication operation with minimum memory
624* movement.
625*
626* We could reduce LDV back to 2*MAX(NPM1,NQM1)
627* by increasing the memory movement required in
628* the spread and transpose of v, h and toptau.
629* However, since the non-interleaved path already
630* provides a mear minimum memory requirement option,
631* we did not provide this additional path.
632*
633 LDV = 4*( MAX( NPM1, NQM1 ) ) + 2
634*
635 INH = 1
636*
637 INV = INH + LDV / 2
638 INVT = INH + ( ANB+1 )*LDV
639*
640 INHT = INVT + LDV / 2
641 INTMP = INVT + LDV*( ANB+1 )
642*
643 ELSE
644 LDV = MAX( NPM1, NQM1 )
645*
646 INHT = INH + LDV*( ANB+1 )
647 INV = INHT + LDV*( ANB+1 )
648*
649* The code works without this +1, but only because of a
650* coincidence. Without the +1, WORK(INVT) gets trashed, but
651* WORK(INVT) is only used once and when it is used, it is
652* multiplied by WORK( INH ) which is zero. Hence, the fact
653* that WORK(INVT) is trashed has no effect.
654*
655 INVT = INV + LDV*( ANB+1 ) + 1
656 INTMP = INVT + LDV*( 2*ANB )
657*
658 END IF
659*
660.NE. IF( INFO0 ) THEN
661 CALL PXERBLA( ICTXT, 'pzhettrd', -INFO )
662 WORK( 1 ) = DCMPLX( LWMIN )
663 RETURN
664 END IF
665*
666*
667* The satisfies the loop invariant: trueA = A - V * HT - H * VT,
668* (where V, H, VT and HT all have BINDEX+1 rows/columns)
669* the first ANB times through the loop.
670*
671*
672*
673* Setting either ( InH and InHT ) or InV to Z_ZERO
674* is adequate except in the face of NaNs.
675*
676*
677 DO 10 I = 1, NP
678 WORK( INH+I-1 ) = Z_ZERO
679 WORK( INV+I-1 ) = Z_ZERO
680 10 CONTINUE
681 DO 20 I = 1, NQ
682 WORK( INHT+I-1 ) = Z_ZERO
683 20 CONTINUE
684*
685*
686*
687 TOPNV = Z_ZERO
688*
689 LTLIP1 = LIJP1
690 LTNM1 = NPM1
691.GT. IF( MYCOLMYROW ) THEN
692 LTLIP1 = LTLIP1 + 1
693 LTNM1 = LTNM1 - 1
694 END IF
695*
696*
697 DO 210 MININDEX = 1, N - 1, ANB
698*
699*
700 MAXINDEX = MIN( MININDEX+ANB-1, N )
701 LIJB = NUMROC( MAXINDEX, 1, MYCOL, 0, NPCOL ) + 1
702 LIIB = NUMROC( MAXINDEX, 1, MYROW, 0, NPROW ) + 1
703*
704 NQB = NQ - LIJB + 1
705 NPB = NP - LIIB + 1
706 INHTB = INHT + LIJB - 1
707 INVTB = INVT + LIJB - 1
708 INHB = INH + LIIB - 1
709 INVB = INV + LIIB - 1
710*
711*
712*
713*
714 DO 160 INDEX = MININDEX, MIN( MAXINDEX, N-1 )
715*
716 BINDEX = INDEX - MININDEX
717*
718 CURROW = NXTROW
719 CURCOL = NXTCOL
720*
721 NXTROW = MOD( CURROW+1, NPROW )
722 NXTCOL = MOD( CURCOL+1, NPCOL )
723*
724 LII = LIIP1
725 LIJ = LIJP1
726 NPM0 = NPM1
727*
728.EQ. IF( MYROWCURROW ) THEN
729 NPM1 = NPM1 - 1
730 LIIP1 = LIIP1 + 1
731 END IF
732.EQ. IF( MYCOLCURCOL ) THEN
733 NQM1 = NQM1 - 1
734 LIJP1 = LIJP1 + 1
735 LTLIP1 = LTLIP1 + 1
736 LTNM1 = LTNM1 - 1
737 END IF
738*
739*
740*
741*
742* V = NV, VT = NVT, H = NH, HT = NHT
743*
744*
745* Update the current column of A
746*
747*
748.EQ. IF( MYCOLCURCOL ) THEN
749*
750 INDEXA = LII + ( LIJ-1 )*LDA
751 INDEXINV = INV + LII - 1 + ( BINDEX-1 )*LDV
752 INDEXINH = INH + LII - 1 + ( BINDEX-1 )*LDV
753 CONJTOPH = DCONJG( WORK( INHT+LIJ-1+BINDEX*LDV ) )
754 CONJTOPV = DCONJG( TOPNV )
755*
756.GT. IF( INDEX1 ) THEN
757 DO 30 I = 0, NPM0 - 1
758* A( INDEXA+I ) = A( INDEXA+I )
759 A( INDEXA+I ) = A( INDEXA+I ) -
760 $ WORK( INDEXINV+LDV+I )*CONJTOPH -
761 $ WORK( INDEXINH+LDV+I )*CONJTOPV
762 30 CONTINUE
763 END IF
764*
765*
766 END IF
767*
768*
769.EQ. IF( MYCOLCURCOL ) THEN
770*
771* Compute the householder vector
772*
773.EQ. IF( MYROWCURROW ) THEN
774 DTMP( 2 ) = DBLE( A( LII+( LIJ-1 )*LDA ) )
775 ELSE
776 DTMP( 2 ) = ZERO
777 END IF
778.EQ. IF( MYROWNXTROW ) THEN
779 DTMP( 3 ) = DBLE( A( LIIP1+( LIJ-1 )*LDA ) )
780 DTMP( 4 ) = DIMAG( A( LIIP1+( LIJ-1 )*LDA ) )
781 ELSE
782 DTMP( 3 ) = ZERO
783 DTMP( 4 ) = ZERO
784 END IF
785*
786 NORM = DZNRM2( NPM1, A( LIIP1+( LIJ-1 )*LDA ), 1 )
787 DTMP( 1 ) = NORM
788*
789* IF DTMP(5) = 1.0, NORM is too large and might cause
790* overflow, hence PDTREECOMB must be called. IF DTMP(5)
791* is zero on output, DTMP(1) can be trusted.
792*
793 DTMP( 5 ) = ZERO
794.GE..OR..LT. IF( DTMP( 1 )SAFMAX DTMP( 1 )SAFMIN ) THEN
795 DTMP( 5 ) = ONE
796 DTMP( 1 ) = ZERO
797 END IF
798*
799 DTMP( 1 ) = DTMP( 1 )*DTMP( 1 )
800 CALL DGSUM2D( ICTXT, 'c', ' ', 5, 1, DTMP, 5, -1,
801 $ CURCOL )
802.EQ. IF( DTMP( 5 )ZERO ) THEN
803 DTMP( 1 ) = SQRT( DTMP( 1 ) )
804 ELSE
805 DTMP( 1 ) = NORM
806 CALL PDTREECOMB( ICTXT, 'c', 1, DTMP, -1, MYCOL,
807 $ DCOMBNRM2 )
808 END IF
809*
810 NORM = DTMP( 1 )
811*
812 D( LIJ ) = DTMP( 2 )
813.EQ..AND..EQ. IF( MYROWCURROW MYCOLCURCOL ) THEN
814 A( LII+( LIJ-1 )*LDA ) = DCMPLX( D( LIJ ), ZERO )
815 END IF
816*
817*
818 ALPHA = DCMPLX( DTMP( 3 ), DTMP( 4 ) )
819*
820 NORM = SIGN( NORM, DBLE( ALPHA ) )
821*
822.EQ. IF( NORMZERO ) THEN
823 TOPTAU = ZERO
824 ELSE
825 BETA = NORM + ALPHA
826 TOPTAU = BETA / NORM
827 ONEOVERBETA = 1.0D0 / BETA
828*
829 CALL ZSCAL( NPM1, ONEOVERBETA,
830 $ A( LIIP1+( LIJ-1 )*LDA ), 1 )
831 END IF
832*
833.EQ. IF( MYROWNXTROW ) THEN
834 A( LIIP1+( LIJ-1 )*LDA ) = Z_ONE
835 END IF
836*
837 TAU( LIJ ) = TOPTAU
838 E( LIJ ) = -NORM
839*
840 END IF
841*
842*
843* Spread v, nh, toptau across
844*
845 DO 40 I = 0, NPM1 - 1
846 WORK( INV+LIIP1-1+BINDEX*LDV+NPM1+I ) = A( LIIP1+I+
847 $ ( LIJ-1 )*LDA )
848 40 CONTINUE
849*
850.EQ. IF( MYCOLCURCOL ) THEN
851 WORK( INV+LIIP1-1+BINDEX*LDV+NPM1+NPM1 ) = TOPTAU
852 CALL ZGEBS2D( ICTXT, 'r', ' ', NPM1+NPM1+1, 1,
853 $ WORK( INV+LIIP1-1+BINDEX*LDV ),
854 $ NPM1+NPM1+1 )
855 ELSE
856 CALL ZGEBR2D( ICTXT, 'r', ' ', NPM1+NPM1+1, 1,
857 $ WORK( INV+LIIP1-1+BINDEX*LDV ),
858 $ NPM1+NPM1+1, MYROW, CURCOL )
859 TOPTAU = WORK( INV+LIIP1-1+BINDEX*LDV+NPM1+NPM1 )
860 END IF
861 DO 50 I = 0, NPM1 - 1
862 WORK( INH+LIIP1-1+( BINDEX+1 )*LDV+I ) = WORK( INV+LIIP1-
863 $ 1+BINDEX*LDV+NPM1+I )
864 50 CONTINUE
865*
866.LT. IF( INDEXN ) THEN
867.EQ..AND..EQ. IF( MYROWNXTROW MYCOLCURCOL )
868 $ A( LIIP1+( LIJ-1 )*LDA ) = E( LIJ )
869 END IF
870*
871* Transpose v, nh
872*
873*
874.EQ. IF( MYROWMYCOL ) THEN
875 DO 60 I = 0, NPM1 + NPM1
876 WORK( INVT+LIJP1-1+BINDEX*LDV+I ) = WORK( INV+LIIP1-1+
877 $ BINDEX*LDV+I )
878 60 CONTINUE
879 ELSE
880 CALL ZGESD2D( ICTXT, NPM1+NPM1, 1,
881 $ WORK( INV+LIIP1-1+BINDEX*LDV ), NPM1+NPM1,
882 $ MYCOL, MYROW )
883 CALL ZGERV2D( ICTXT, NQM1+NQM1, 1,
884 $ WORK( INVT+LIJP1-1+BINDEX*LDV ), NQM1+NQM1,
885 $ MYCOL, MYROW )
886 END IF
887*
888 DO 70 I = 0, NQM1 - 1
889 WORK( INHT+LIJP1-1+( BINDEX+1 )*LDV+I ) = WORK( INVT+
890 $ LIJP1-1+BINDEX*LDV+NQM1+I )
891 70 CONTINUE
892*
893*
894* Update the current block column of A
895*
896.GT. IF( INDEX1 ) THEN
897 DO 90 J = LIJP1, LIJB - 1
898 DO 80 I = 0, NPM1 - 1
899*
900 A( LIIP1+I+( J-1 )*LDA ) = A( LIIP1+I+( J-1 )*LDA )
901 $ - WORK( INV+LIIP1-1+BINDEX*LDV+I )*
902 $ DCONJG( WORK( INHT+J-1+BINDEX*LDV ) ) -
903 $ WORK( INH+LIIP1-1+BINDEX*LDV+I )*
904 $ DCONJG( WORK( INVT+J-1+BINDEX*LDV ) )
905 80 CONTINUE
906 90 CONTINUE
907 END IF
908*
909*
910*
911* Compute NV = A * NHT; NVT = A * NH
912*
913* These two lines are necessary because these elements
914* are not always involved in the calls to ZTRMVT
915* for two reasons:
916* 1) On diagonal processors, the call to TRMVT
917* involves only LTNM1-1 elements
918* 2) On some processes, NQM1 < LTM1 or LIIP1 < LTLIP1
919* and when the results are combined across all processes,
920* uninitialized values may be included.
921 WORK( INV+LIIP1-1+( BINDEX+1 )*LDV ) = Z_ZERO
922 WORK( INVT+LIJP1-1+( BINDEX+1 )*LDV+NQM1-1 ) = Z_ZERO
923*
924*
925.EQ. IF( MYROWMYCOL ) THEN
926.GT. IF( LTNM11 ) THEN
927 CALL ZTRMVT( 'l', LTNM1-1,
928 $ A( LTLIP1+1+( LIJP1-1 )*LDA ), LDA,
929 $ WORK( INVT+LIJP1-1+( BINDEX+1 )*LDV ), 1,
930 $ WORK( INH+LTLIP1+1-1+( BINDEX+1 )*LDV ),
931 $ 1, WORK( INV+LTLIP1+1-1+( BINDEX+1 )*
932 $ LDV ), 1, WORK( INHT+LIJP1-1+( BINDEX+
933 $ 1 )*LDV ), 1 )
934 END IF
935 DO 100 I = 1, LTNM1
936 WORK( INVT+LIJP1+I-1-1+( BINDEX+1 )*LDV )
937 $ = WORK( INVT+LIJP1+I-1-1+( BINDEX+1 )*LDV ) +
938 $ A( LTLIP1+I-1+( LIJP1+I-1-1 )*LDA )*
939 $ WORK( INH+LTLIP1+I-1-1+( BINDEX+1 )*LDV )
940 100 CONTINUE
941 ELSE
942.GT. IF( LTNM10 )
943 $ CALL ZTRMVT( 'l', LTNM1, A( LTLIP1+( LIJP1-1 )*LDA ),
944 $ LDA, WORK( INVT+LIJP1-1+( BINDEX+1 )*
945 $ LDV ), 1, WORK( INH+LTLIP1-1+( BINDEX+
946 $ 1 )*LDV ), 1, WORK( INV+LTLIP1-1+
947 $ ( BINDEX+1 )*LDV ), 1,
948 $ WORK( INHT+LIJP1-1+( BINDEX+1 )*LDV ),
949 $ 1 )
950*
951 END IF
952*
953*
954* We take advantage of the fact that:
955* A * sum( B ) = sum ( A * B ) for matrices A,B
956*
957* trueA = A + V * HT + H * VT
958* hence: (trueA)v = Av' + V * HT * v + H * VT * v
959* VT * v = sum_p_in_NPROW ( VTp * v )
960* H * VT * v = H * sum (VTp * v) = sum ( H * VTp * v )
961*
962* v = v + V * HT * h + H * VT * h
963*
964*
965*
966* tmp = HT * nh1
967 DO 110 I = 1, 2*( BINDEX+1 )
968 WORK( INTMP-1+I ) = 0
969 110 CONTINUE
970*
971 IF( BALANCED ) THEN
972 NPSET = NPROW
973 MYSETNUM = MYROW
974 ROWSPERPROC = ICEIL( NQB, NPSET )
975 MYFIRSTROW = MIN( NQB+1, 1+ROWSPERPROC*MYSETNUM )
976 NUMROWS = MIN( ROWSPERPROC, NQB-MYFIRSTROW+1 )
977*
978*
979* tmp = HT * v
980*
981 CALL ZGEMV( 'c', NUMROWS, BINDEX+1, Z_ONE,
982 $ WORK( INHTB+MYFIRSTROW-1 ), LDV,
983 $ WORK( INHTB+MYFIRSTROW-1+( BINDEX+1 )*LDV ),
984 $ 1, Z_ZERO, WORK( INTMP ), 1 )
985* tmp2 = VT * v
986 CALL ZGEMV( 'c', NUMROWS, BINDEX+1, Z_ONE,
987 $ WORK( INVTB+MYFIRSTROW-1 ), LDV,
988 $ WORK( INHTB+MYFIRSTROW-1+( BINDEX+1 )*LDV ),
989 $ 1, Z_ZERO, WORK( INTMP+BINDEX+1 ), 1 )
990*
991*
992 CALL ZGSUM2D( ICTXT, 'c', ' ', 2*( BINDEX+1 ), 1,
993 $ WORK( INTMP ), 2*( BINDEX+1 ), -1, -1 )
994 ELSE
995* tmp = HT * v
996*
997 CALL ZGEMV( 'c', NQB, BINDEX+1, Z_ONE, WORK( INHTB ),
998 $ LDV, WORK( INHTB+( BINDEX+1 )*LDV ), 1,
999 $ Z_ZERO, WORK( INTMP ), 1 )
1000* tmp2 = VT * v
1001 CALL ZGEMV( 'c', NQB, BINDEX+1, Z_ONE, WORK( INVTB ),
1002 $ LDV, WORK( INHTB+( BINDEX+1 )*LDV ), 1,
1003 $ Z_ZERO, WORK( INTMP+BINDEX+1 ), 1 )
1004*
1005 END IF
1006*
1007*
1008*
1009 IF( BALANCED ) THEN
1010 MYSETNUM = MYCOL
1011*
1012 ROWSPERPROC = ICEIL( NPB, NPSET )
1013 MYFIRSTROW = MIN( NPB+1, 1+ROWSPERPROC*MYSETNUM )
1014 NUMROWS = MIN( ROWSPERPROC, NPB-MYFIRSTROW+1 )
1015*
1016 CALL ZGSUM2D( ICTXT, 'r', ' ', 2*( BINDEX+1 ), 1,
1017 $ WORK( INTMP ), 2*( BINDEX+1 ), -1, -1 )
1018*
1019*
1020* v = v + V * tmp
1021.GT. IF( INDEX1. ) THEN
1022 CALL ZGEMV( 'n', NUMROWS, BINDEX+1, Z_NEGONE,
1023 $ WORK( INVB+MYFIRSTROW-1 ), LDV,
1024 $ WORK( INTMP ), 1, Z_ONE,
1025 $ WORK( INVB+MYFIRSTROW-1+( BINDEX+1 )*
1026 $ LDV ), 1 )
1027*
1028* v = v + H * tmp2
1029 CALL ZGEMV( 'n', NUMROWS, BINDEX+1, Z_NEGONE,
1030 $ WORK( INHB+MYFIRSTROW-1 ), LDV,
1031 $ WORK( INTMP+BINDEX+1 ), 1, Z_ONE,
1032 $ WORK( INVB+MYFIRSTROW-1+( BINDEX+1 )*
1033 $ LDV ), 1 )
1034 END IF
1035*
1036 ELSE
1037* v = v + V * tmp
1038 CALL ZGEMV( 'n', NPB, BINDEX+1, Z_NEGONE, WORK( INVB ),
1039 $ LDV, WORK( INTMP ), 1, Z_ONE,
1040 $ WORK( INVB+( BINDEX+1 )*LDV ), 1 )
1041*
1042*
1043* v = v + H * tmp2
1044 CALL ZGEMV( 'n', NPB, BINDEX+1, Z_NEGONE, WORK( INHB ),
1045 $ LDV, WORK( INTMP+BINDEX+1 ), 1, Z_ONE,
1046 $ WORK( INVB+( BINDEX+1 )*LDV ), 1 )
1047*
1048 END IF
1049*
1050*
1051* Transpose NV and add it back into NVT
1052*
1053.EQ. IF( MYROWMYCOL ) THEN
1054 DO 120 I = 0, NQM1 - 1
1055 WORK( INTMP+I ) = WORK( INVT+LIJP1-1+( BINDEX+1 )*LDV+
1056 $ I )
1057 120 CONTINUE
1058 ELSE
1059 CALL ZGESD2D( ICTXT, NQM1, 1,
1060 $ WORK( INVT+LIJP1-1+( BINDEX+1 )*LDV ),
1061 $ NQM1, MYCOL, MYROW )
1062 CALL ZGERV2D( ICTXT, NPM1, 1, WORK( INTMP ), NPM1, MYCOL,
1063 $ MYROW )
1064*
1065 END IF
1066 DO 130 I = 0, NPM1 - 1
1067 WORK( INV+LIIP1-1+( BINDEX+1 )*LDV+I ) = WORK( INV+LIIP1-
1068 $ 1+( BINDEX+1 )*LDV+I ) + WORK( INTMP+I )
1069 130 CONTINUE
1070*
1071* Sum-to-one NV rowwise (within a row)
1072*
1073 CALL ZGSUM2D( ICTXT, 'r', ' ', NPM1, 1,
1074 $ WORK( INV+LIIP1-1+( BINDEX+1 )*LDV ), NPM1,
1075 $ MYROW, NXTCOL )
1076*
1077*
1078* Dot product c = NV * NH
1079* Sum-to-all c within next processor column
1080*
1081*
1082.EQ. IF( MYCOLNXTCOL ) THEN
1083 CC( 1 ) = Z_ZERO
1084 DO 140 I = 0, NPM1 - 1
1085 CC( 1 ) = CC( 1 ) + DCONJG( WORK( INV+LIIP1-1+
1086 $ ( BINDEX+1 )*LDV+I ) )*
1087 $ WORK( INH+LIIP1-1+( BINDEX+1 )*LDV+I )
1088 140 CONTINUE
1089.EQ. IF( MYROWNXTROW ) THEN
1090 CC( 2 ) = WORK( INV+LIIP1-1+( BINDEX+1 )*LDV )
1091 CC( 3 ) = WORK( INH+LIIP1-1+( BINDEX+1 )*LDV )
1092 ELSE
1093 CC( 2 ) = Z_ZERO
1094 CC( 3 ) = Z_ZERO
1095 END IF
1096 CALL ZGSUM2D( ICTXT, 'c', ' ', 3, 1, CC, 3, -1, NXTCOL )
1097*
1098 TOPV = CC( 2 )
1099 C = CC( 1 )
1100 TOPH = CC( 3 )
1101*
1102 TOPNV = TOPTAU*( TOPV-C*DCONJG( TOPTAU ) / 2*TOPH )
1103*
1104*
1105* Compute V = Tau * (V - C * Tau' / 2 * H )
1106*
1107*
1108 DO 150 I = 0, NPM1 - 1
1109 WORK( INV+LIIP1-1+( BINDEX+1 )*LDV+I ) = TOPTAU*
1110 $ ( WORK( INV+LIIP1-1+( BINDEX+1 )*LDV+I )-C*
1111 $ DCONJG( TOPTAU ) / 2*WORK( INH+LIIP1-1+( BINDEX+
1112 $ 1 )*LDV+I ) )
1113 150 CONTINUE
1114*
1115 END IF
1116*
1117*
1118 160 CONTINUE
1119*
1120*
1121* Perform the rank2k update
1122*
1123.LT. IF( MAXINDEXN ) THEN
1124*
1125 DO 170 I = 0, NPM1 - 1
1126 WORK( INTMP+I ) = WORK( INH+LIIP1-1+ANB*LDV+I )
1127 170 CONTINUE
1128*
1129*
1130*
1131.NOT. IF( TWOGEMMS ) THEN
1132 IF( INTERLEAVE ) THEN
1133 LDZG = LDV / 2
1134 ELSE
1135 CALL ZLAMOV( 'a', LTNM1, ANB, WORK( INHT+LIJP1-1 ),
1136 $ LDV, WORK( INVT+LIJP1-1+ANB*LDV ), LDV )
1137*
1138 CALL ZLAMOV( 'a', LTNM1, ANB, WORK( INV+LTLIP1-1 ),
1139 $ LDV, WORK( INH+LTLIP1-1+ANB*LDV ), LDV )
1140 LDZG = LDV
1141 END IF
1142 NBZG = ANB*2
1143 ELSE
1144 LDZG = LDV
1145 NBZG = ANB
1146 END IF
1147*
1148*
1149 DO 180 PBMIN = 1, LTNM1, PNB
1150*
1151 PBSIZE = MIN( PNB, LTNM1-PBMIN+1 )
1152 PBMAX = MIN( LTNM1, PBMIN+PNB-1 )
1153 CALL ZGEMM( 'n', 'c', PBSIZE, PBMAX, NBZG, Z_NEGONE,
1154 $ WORK( INH+LTLIP1-1+PBMIN-1 ), LDZG,
1155 $ WORK( INVT+LIJP1-1 ), LDZG, Z_ONE,
1156 $ A( LTLIP1+PBMIN-1+( LIJP1-1 )*LDA ), LDA )
1157 IF( TWOGEMMS ) THEN
1158 CALL ZGEMM( 'n', 'c', PBSIZE, PBMAX, ANB, Z_NEGONE,
1159 $ WORK( INV+LTLIP1-1+PBMIN-1 ), LDZG,
1160 $ WORK( INHT+LIJP1-1 ), LDZG, Z_ONE,
1161 $ A( LTLIP1+PBMIN-1+( LIJP1-1 )*LDA ), LDA )
1162 END IF
1163 180 CONTINUE
1164*
1165*
1166*
1167 DO 190 I = 0, NPM1 - 1
1168 WORK( INV+LIIP1-1+I ) = WORK( INV+LIIP1-1+ANB*LDV+I )
1169 WORK( INH+LIIP1-1+I ) = WORK( INTMP+I )
1170 190 CONTINUE
1171 DO 200 I = 0, NQM1 - 1
1172 WORK( INHT+LIJP1-1+I ) = WORK( INHT+LIJP1-1+ANB*LDV+I )
1173 200 CONTINUE
1174*
1175*
1176 END IF
1177*
1178* End of the update A code
1179*
1180 210 CONTINUE
1181*
1182.EQ. IF( MYCOLNXTCOL ) THEN
1183.EQ. IF( MYROWNXTROW ) THEN
1184*
1185 D( NQ ) = DBLE( A( NP+( NQ-1 )*LDA ) )
1186 A( NP+( NQ-1 )*LDA ) = D( NQ )
1187*
1188 CALL DGEBS2D( ICTXT, 'c', ' ', 1, 1, D( NQ ), 1 )
1189 ELSE
1190 CALL DGEBR2D( ICTXT, 'c', ' ', 1, 1, D( NQ ), 1, NXTROW,
1191 $ NXTCOL )
1192 END IF
1193 END IF
1194*
1195*
1196*
1197*
1198 WORK( 1 ) = DCMPLX( LWMIN )
1199 RETURN
1200*
1201* End of PZHETTRD
1202*
1203*
logical function lsame(ca, cb)
LSAME
Definition lsame.f:53
subroutine zscal(n, za, zx, incx)
ZSCAL
Definition zscal.f:78
subroutine zgemv(trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
ZGEMV
Definition zgemv.f:158
subroutine zgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
ZGEMM
Definition zgemm.f:187
real(wp) function dznrm2(n, x, incx)
DZNRM2
Definition dznrm2.f90:90
integer function iceil(inum, idenom)
Definition iceil.f:2
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
subroutine dgebs2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1082
subroutine zgebr2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1092
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition mpi.f:1577
subroutine zgebs2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1051
subroutine pxerbla(contxt, srname, info)
Definition mpi.f:1600
subroutine dgebr2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1123
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition mpi.f:786
subroutine pchk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, nextra, ex, expos, info)
Definition pchkxmat.f:3
double precision function pdlamch(ictxt, cmach)
Definition pdblastst.f:6769
subroutine dcombnrm2(x, y)
Definition pdtreecomb.f:307
subroutine pdtreecomb(ictxt, scope, n, mine, rdest0, cdest0, subptr)
Definition pdtreecomb.f:3
integer function pjlaenv(ictxt, ispec, name, opts, n1, n2, n3, n4)
Definition pjlaenv.f:3
subroutine pzhettrd(uplo, n, a, ia, ja, desca, d, e, tau, work, lwork, info)
Definition pzhettrd.f:3
subroutine ztrmvt(uplo, n, t, ldt, x, incx, y, incy, w, incw, z, incz)
Definition ztrmvt.f:3