OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pcpotrrv.f File Reference

Go to the source code of this file.

Functions/Subroutines

subroutine pcpotrrv (uplo, n, a, ia, ja, desca, work)

Function/Subroutine Documentation

◆ pcpotrrv()

subroutine pcpotrrv ( character uplo,
integer n,
complex, dimension( * ) a,
integer ia,
integer ja,
integer, dimension( * ) desca,
complex, dimension( * ) work )

Definition at line 1 of file pcpotrrv.f.

2*
3* -- ScaLAPACK routine (version 1.7) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* May 28, 2001
7*
8* .. Scalar Arguments ..
9 CHARACTER UPLO
10 INTEGER IA, JA, N
11* ..
12* .. Array Arguments ..
13 INTEGER DESCA( * )
14 COMPLEX A( * ), WORK( * )
15* ..
16*
17* Purpose
18* =======
19*
20* PCPOTRRV recomputes sub( A ) = A(IA:IA+N-1,JA:JA+N-1) from L or U
21* computed by PCPOTRF. The routine performs the Cholesky factorization
22* in reverse.
23*
24* Notes
25* =====
26*
27* Each global data object is described by an associated description
28* vector. This vector stores the information required to establish
29* the mapping between an object element and its corresponding process
30* and memory location.
31*
32* Let A be a generic term for any 2D block cyclicly distributed array.
33* Such a global array has an associated description vector DESCA.
34* In the following comments, the character _ should be read as
35* "of the global array".
36*
37* NOTATION STORED IN EXPLANATION
38* --------------- -------------- --------------------------------------
39* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
40* DTYPE_A = 1.
41* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
42* the BLACS process grid A is distribu-
43* ted over. The context itself is glo-
44* bal, but the handle (the integer
45* value) may vary.
46* M_A (global) DESCA( M_ ) The number of rows in the global
47* array A.
48* N_A (global) DESCA( N_ ) The number of columns in the global
49* array A.
50* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
51* the rows of the array.
52* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
53* the columns of the array.
54* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
55* row of the array A is distributed.
56* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
57* first column of the array A is
58* distributed.
59* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
60* array. LLD_A >= MAX(1,LOCr(M_A)).
61*
62* Let K be the number of rows or columns of a distributed matrix,
63* and assume that its process grid has dimension p x q.
64* LOCr( K ) denotes the number of elements of K that a process
65* would receive if K were distributed over the p processes of its
66* process column.
67* Similarly, LOCc( K ) denotes the number of elements of K that a
68* process would receive if K were distributed over the q processes of
69* its process row.
70* The values of LOCr() and LOCc() may be determined via a call to the
71* ScaLAPACK tool function, NUMROC:
72* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
73* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
74* An upper bound for these quantities may be computed by:
75* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
76* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
77*
78* Arguments
79* =========
80*
81* UPLO (global input) CHARACTER
82* Specifies whether the upper or lower triangular part of the
83* hermitian distributed matrix sub( A ) is stored:
84* stored:
85* = 'U': Upper triangular
86* = 'L': Lower triangular
87*
88* N (global input) INTEGER
89* The number of rows and columns to be operated on, i.e. the
90* order of the distributed submatrix sub( A ). N >= 0.
91*
92* A (local input/local output) COMPLEX pointer into the
93* local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
94* On entry, the local pieces of the factors L or U of the
95* distributed matrix sub( A ) from the Cholesky factorization.
96* On exit, the original distributed matrix sub( A ) is
97* restored.
98*
99* IA (global input) INTEGER
100* The row index in the global array A indicating the first
101* row of sub( A ).
102*
103* JA (global input) INTEGER
104* The column index in the global array A indicating the
105* first column of sub( A ).
106*
107* DESCA (global and local input) INTEGER array of dimension DLEN_.
108* The array descriptor for the distributed matrix A.
109*
110* WORK (local workspace) COMPLEX array, dimension (LWORK)
111* LWORK >= MB_A*NB_A.
112*
113* =====================================================================
114*
115* .. Parameters ..
116 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
117 $ LLD_, MB_, M_, NB_, N_, RSRC_
118 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
119 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
120 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
121 REAL ONE
122 parameter( one = 1.0e+0 )
123 COMPLEX CONE, ZERO
124 parameter( cone = ( 1.0e+0, 0.0e+0 ),
125 $ zero = ( 0.0e+0, 0.0e+0 ) )
126* ..
127* .. Local Scalars ..
128 LOGICAL UPPER
129 CHARACTER COLBTOP, ROWBTOP
130 INTEGER IACOL, IAROW, ICTXT, IL, J, JB, JL, JN, MYCOL,
131 $ MYROW, NPCOL, NPROW
132* .. Local Arrays ..
133 INTEGER DESCW( DLEN_ )
134* ..
135* .. External Subroutines ..
137 $ pcherk, pctrmm, pb_topget, pb_topset
138* ..
139* .. External Functions ..
140 LOGICAL LSAME
141 INTEGER ICEIL, INDXG2P
142 EXTERNAL iceil, indxg2p, lsame
143* ..
144* .. Intrinsic Functions ..
145 INTRINSIC min, mod
146* ..
147* .. Executable Statements ..
148*
149* Get grid parameters
150*
151 ictxt = desca( ctxt_ )
152 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
153*
154 CALL pb_topget( ictxt, 'Broadcast', 'Rowwise', rowbtop )
155 CALL pb_topget( ictxt, 'Broadcast', 'Columnwise', colbtop )
156*
157 upper = lsame( uplo, 'U' )
158 jn = min( iceil( ja, desca( nb_ ) ) * desca( nb_ ), ja+n-1 )
159 jl = max( ( ( ja+n-2 ) / desca( nb_ ) ) * desca( nb_ ) + 1, ja )
160 il = max( ( ( ia+n-2 ) / desca( mb_ ) ) * desca( mb_ ) + 1, ia )
161 iarow = indxg2p( il, desca( mb_ ), myrow, desca( rsrc_ ), nprow )
162 iacol = indxg2p( jl, desca( nb_ ), mycol, desca( csrc_ ), npcol )
163*
164* Define array descriptor for working array WORK
165*
166 CALL descset( descw, desca( mb_ ), desca( nb_ ), desca( mb_ ),
167 $ desca( nb_ ), iarow, iacol, ictxt, desca( mb_ ) )
168*
169 IF ( upper ) THEN
170*
171* Compute A from the Cholesky factor U : A = U'*U.
172*
173 CALL pb_topset( ictxt, 'Broadcast', 'Rowwise', ' ' )
174 CALL PB_TOPSET( ICTXT, 'broadcast', 'columnwise', 's-ring' )
175*
176 DO 10 J = JL, JN+1, -DESCA( NB_ )
177*
178 JB = MIN( JA+N-J, DESCA( NB_ ) )
179*
180* Update the trailing matrix, A = A + U'*U
181*
182 CALL PCHERK( 'upper', 'conjugate transpose', JA+N-J-JB, JB,
183 $ ONE, A, IL, J+JB, DESCA, ONE, A, IL+JB, J+JB,
184 $ DESCA )
185*
186* Copy current diagonal block of A into workspace
187*
188 CALL PCLACPY( 'all', JB, JB, A, IL, J, DESCA, WORK, 1, 1,
189 $ DESCW )
190*
191* Zero strict lower triangular part of diagonal block, to make
192* it U1.
193*
194 CALL PCLASET( 'lower', JB-1, JB, ZERO, ZERO, A, IL+1, J,
195 $ DESCA )
196*
197* Update the row panel U with the triangular matrix
198*
199 CALL PCTRMM( 'left', 'upper', 'conjugate transpose',
200 $ 'non-unit', JB, N-J+JA, CONE, WORK, 1, 1,
201 $ DESCW, A, IL, J, DESCA )
202*
203* Restore the strict lower triangular part of diagonal block.
204*
205 CALL PCLACPY( 'lower', JB-1, JB, WORK, 2, 1, DESCW, A,
206 $ IL+1, J, DESCA )
207*
208 IL = IL - DESCA( MB_ )
209 DESCW( RSRC_ ) = MOD( DESCW( RSRC_ ) + NPROW - 1, NPROW )
210 DESCW( CSRC_ ) = MOD( DESCW( CSRC_ ) + NPCOL - 1, NPCOL )
211*
212 10 CONTINUE
213*
214* Handle first block separately
215*
216 JB = MIN( JN-JA+1, DESCA( NB_ ) )
217*
218* Update the trailing matrix, A = A + U'*U
219*
220 CALL PCHERK( 'upper', 'conjugate transpose', N-JB, JB, ONE, A,
221 $ IA, JA+JB, DESCA, ONE, A, IA+JB, JA+JB, DESCA )
222*
223* Copy current diagonal block of A into workspace
224*
225 CALL PCLACPY( 'all', JB, JB, A, IA, JA, DESCA, WORK, 1, 1,
226 $ DESCW )
227*
228* Zero strict lower triangular part of diagonal block, to make
229* it U1.
230*
231 CALL PCLASET( 'lower', JB-1, JB, ZERO, ZERO, A, IA+1, JA,
232 $ DESCA )
233*
234* Update the row panel U with the triangular matrix
235*
236 CALL PCTRMM( 'left', 'upper', 'conjugate transpose', 'non-unit',
237 $ JB, N, CONE, WORK, 1, 1, DESCW, A, IA, JA, DESCA )
238*
239* Restore the strict lower triangular part of diagonal block.
240*
241 CALL PCLACPY( 'lower', JB-1, JB, WORK, 2, 1, DESCW, A, IA+1,
242 $ JA, DESCA )
243*
244 ELSE
245*
246* Compute A from the Cholesky factor L : A = L*L'.
247*
248 CALL PB_TOPSET( ICTXT, 'broadcast', 'rowwise', 's-ring' )
249 CALL PB_TOPSET( ICTXT, 'broadcast', 'columnwise', ' ' )
250*
251 DO 20 J = JL, JN+1, -DESCA( NB_ )
252*
253 JB = MIN( JA+N-J, DESCA( NB_ ) )
254*
255* Update the trailing matrix, A = A + L*L'
256*
257 CALL PCHERK( 'lower', 'no transpose', IA+N-IL-JB, JB, ONE, A,
258 $ IL+JB, J, DESCA, ONE, A, IL+JB, J+JB, DESCA )
259*
260* Copy current diagonal block of A into workspace
261*
262 CALL PCLACPY( 'all', JB, JB, A, IL, J, DESCA, WORK, 1, 1,
263 $ DESCW )
264*
265* Zero strict upper triangular part of diagonal block, to make
266* it L1.
267*
268 CALL PCLASET( 'upper', JB, JB-1, ZERO, ZERO, A, IL, J+1,
269 $ DESCA )
270*
271* Update the column panel L with the triangular matrix
272*
273 CALL PCTRMM( 'right', 'lower', 'conjugate transpose',
274 $ 'non-unit', IA+N-IL, JB, CONE, WORK, 1, 1,
275 $ DESCW, A, IL, J, DESCA )
276*
277* Restore the strict upper triangular part of diagonal block.
278*
279 CALL PCLACPY( 'upper', JB, JB-1, WORK, 1, 2, DESCW, A,
280 $ IL, J+1, DESCA )
281*
282 IL = IL - DESCA( MB_ )
283 DESCW( RSRC_ ) = MOD( DESCW( RSRC_ ) + NPROW - 1, NPROW )
284 DESCW( CSRC_ ) = MOD( DESCW( CSRC_ ) + NPCOL - 1, NPCOL )
285*
286 20 CONTINUE
287*
288* Handle first block separately
289*
290 JB = MIN( JN-JA+1, DESCA( NB_ ) )
291*
292* Update the trailing matrix, A = A + L*L'
293*
294 CALL PCHERK( 'lower', 'no transpose', N-JB, JB, ONE, A,
295 $ IA+JB, JA, DESCA, ONE, A, IA+JB, JA+JB, DESCA )
296*
297* Copy current diagonal block of A into workspace
298*
299 CALL PCLACPY( 'all', JB, JB, A, IA, JA, DESCA, WORK, 1, 1,
300 $ DESCW )
301*
302* Zero strict upper triangular part of diagonal block, to make
303* it L1.
304*
305 CALL PCLASET( 'upper', JB, JB-1, ZERO, ZERO, A, IA, JA+1,
306 $ DESCA )
307*
308* Update the column panel L with the triangular matrix
309*
310 CALL PCTRMM( 'right', 'lower', 'conjugate transpose',
311 $ 'non-unit', N, JB, CONE, WORK, 1, 1, DESCW, A,
312 $ IA, JA, DESCA )
313*
314* Restore the strict upper triangular part of diagonal block.
315*
316 CALL PCLACPY( 'upper', JB, JB-1, WORK, 1, 2, DESCW, A, IA,
317 $ JA+1, DESCA )
318*
319 END IF
320*
321 CALL PB_TOPSET( ICTXT, 'broadcast', 'rowwise', ROWBTOP )
322 CALL PB_TOPSET( ICTXT, 'broadcast', 'columnwise', COLBTOP )
323*
324 RETURN
325*
326* End of PCPOTRRV
327*
logical function lsame(ca, cb)
LSAME
Definition lsame.f:53
integer function iceil(inum, idenom)
Definition iceil.f:2
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
integer function indxg2p(indxglob, nb, iproc, isrcproc, nprocs)
Definition mpi.f:947
subroutine descset(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld)
Definition mpi.f:1610
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
subroutine pclaset(uplo, m, n, alpha, beta, a, ia, ja, desca)
Definition pcblastst.f:7508
subroutine pclacpy(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb)
Definition pclacpy.f:3