1 COMPLEX*16 FUNCTION pzlatra( N, A, IA, JA, DESCA )
3* -- scalapack auxiliary routine(version 1.7) --
4* university of tennessee, knoxville, oak ridge national laboratory,
5* and university of california, berkeley.
8* .. scalar arguments ..
11* .. array arguments ..
19*
pzlatra computes
the trace of an n-by-n distributed matrix sub( a )
20* denoting a( ia:ia+n-1, ja:ja+n-1 ).
the result is left on every
26* each global
data object is described by an
associated description
27* vector. this vector stores
the information required to establish
28*
the mapping between an object element and its corresponding process
31* let a be a generic term
for any 2d block cyclicly distributed array.
32* such a global array has an
associated description vector desca.
33* in
the following comments,
the character _ should be read as
34*
"of the global array".
36* notation stored in explanation
37* --------------- -------------- --------------------------------------
38* dtype_a(global) desca( dtype_ )
the descriptor type. in this
case,
40* ctxt_a(global) desca( ctxt_ )
the blacs context handle, indicating
41*
the blacs process grid a is distribu-
42* ted over.
the context itself is glo-
43* bal, but
the handle(
the integer
45* m_a(global) desca( m_ )
the number of rows in
the global
47* n_a(global) desca( n_ )
the number of columns in
the global
49* mb_a(global) desca( mb_ )
the blocking factor used to distribute
51* nb_a(global) desca( nb_ )
the blocking factor used to distribute
53* rsrc_a(global) desca( rsrc_ )
the process row over which
the first
54* row of
the array a is distributed.
55* csrc_a(global) desca( csrc_ )
the process column over which
the
56* first column of
the array a is
58* lld_a(local) desca( lld_ )
the leading dimension of
the local
59* array. lld_a >=
max(1,locr(m_a)).
61* let k be
the number of rows or columns of a distributed matrix,
62* and assume that its process grid has dimension p x q.
63* locr( k ) denotes
the number of elements of k that a process
64* would receive
if k were distributed over
the p processes of its
66* similarly, locc( k ) denotes
the number of elements of k that a
67* process would receive
if k were distributed over
the q processes of
69*
the values of locr() and locc() may be determined via a
call to
the
70* scalapack tool function,
numroc:
71* locr( m ) =
numroc( m, mb_a, myrow, rsrc_a, nprow ),
72* locc( n ) =
numroc( n, nb_a, mycol, csrc_a, npcol ).
73* an upper bound
for these quantities may be computed by:
74* locr( m ) <= ceil( ceil(m/mb_a)/nprow )*mb_a
75* locc( n ) <= ceil( ceil(n/nb_a)/npcol )*nb_a
80* n(global input) integer
81*
the number of rows and columns to be operated on i.e
the
82* order of
the distributed submatrix sub( a ). n >= 0.
84* a(local input)
COMPLEX*16 pointer into
the local memory
85* to an array of
dimension ( LLD_A, LOCc(JA+N-1) ). this array
86*
contains the local pieces of
the distributed matrix
the trace
89* ia(global input) integer
90*
the row index in
the global array a indicating
the first
93* ja(global input) integer
94*
the column index in
the global array a indicating
the
95* first column of sub( a ).
97* desca(global and local input)
INTEGER array of dimension dlen_.
98*
the array descriptor
for the distributed matrix a.
100* ====================================================================
103 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
104 $ lld_, mb_, m_, nb_, n_, rsrc_
105 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
106 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
107 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
109 parameter( zero = 0.0d+0 )
112 INTEGER icurcol, icurrow, ii, ioffa, j, jb, jj, jn,
113 $ lda, ll, mycol, myrow, npcol, nprow
116* ..
External subroutines ..
119* ..
External functions ..
123* ..
Intrinsic functions ..
126* .. executable statements ..
138 CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, ii, jj,
141 jn =
min(
iceil( ja, desca( nb_ ) ) * desca( nb_ ), ja+n-1 )
144 ioffa = ii + ( jj - 1 ) * lda
146* handle first diagonal block separately
148 IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol )
THEN
149 DO 10 ll = ioffa, ioffa + (jb-1)*(lda+1), lda+1
150 trace = trace + a( ll )
153 IF( myrow.EQ.icurrow )
155 IF( mycol.EQ.icurcol )
156 $ ioffa = ioffa + jb*lda
157 icurrow = mod( icurrow+1, nprow )
158 icurcol = mod( icurcol+1, npcol )
160* loop over
the remaining block of columns
162 DO 30 j = jn+1, ja+n-1, desca( nb_ )
163 jb =
min( ja+n-j, desca( nb_ ) )
165 IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol )
THEN
166 DO 20 ll = ioffa, ioffa + (jb-1)*(lda+1), lda+1
167 trace = trace + a( ll )
170 IF( myrow.EQ.icurrow )
172 IF( mycol.EQ.icurcol )
173 $ ioffa = ioffa + jb*lda
174 icurrow = mod( icurrow+1, nprow )
175 icurcol = mod( icurcol+1, npcol )
178 CALL zgsum2d( desca( ctxt_ ),
'All',
' ', 1, 1, trace, 1, -1,