OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pclaschk.f
Go to the documentation of this file.
1 SUBROUTINE pclaschk( SYMM, DIAG, N, NRHS, X, IX, JX, DESCX,
2 $ IASEED, IA, JA, DESCA, IBSEED, ANORM, RESID,
3 $ WORK )
4*
5* -- ScaLAPACK auxiliary routine (version 1.7) --
6* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
7* and University of California, Berkeley.
8* May 1, 1997
9*
10* .. Scalar Arguments ..
11 CHARACTER DIAG, SYMM
12 INTEGER IA, IASEED, IBSEED, IX, JA, JX, N, NRHS
13 REAL ANORM, RESID
14* ..
15* .. Array Arguments ..
16 INTEGER DESCA( * ), DESCX( * )
17 COMPLEX WORK( * ), X( * )
18* ..
19*
20* Purpose
21* =======
22*
23* PCLASCHK computes the residual
24* || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N)
25* to check the accuracy of the factorization and solve steps in the
26* LU and Cholesky decompositions, where sub( A ) denotes
27* A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1).
28*
29* Notes
30* =====
31*
32* Each global data object is described by an associated description
33* vector. This vector stores the information required to establish
34* the mapping between an object element and its corresponding process
35* and memory location.
36*
37* Let A be a generic term for any 2D block cyclicly distributed array.
38* Such a global array has an associated description vector DESCA.
39* In the following comments, the character _ should be read as
40* "of the global array".
41*
42* NOTATION STORED IN EXPLANATION
43* --------------- -------------- --------------------------------------
44* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
45* DTYPE_A = 1.
46* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
47* the BLACS process grid A is distribu-
48* ted over. The context itself is glo-
49* bal, but the handle (the integer
50* value) may vary.
51* M_A (global) DESCA( M_ ) The number of rows in the global
52* array A.
53* N_A (global) DESCA( N_ ) The number of columns in the global
54* array A.
55* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
56* the rows of the array.
57* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
58* the columns of the array.
59* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
60* row of the array A is distributed.
61* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
62* first column of the array A is
63* distributed.
64* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
65* array. LLD_A >= MAX(1,LOCr(M_A)).
66*
67* Let K be the number of rows or columns of a distributed matrix,
68* and assume that its process grid has dimension p x q.
69* LOCr( K ) denotes the number of elements of K that a process
70* would receive if K were distributed over the p processes of its
71* process column.
72* Similarly, LOCc( K ) denotes the number of elements of K that a
73* process would receive if K were distributed over the q processes of
74* its process row.
75* The values of LOCr() and LOCc() may be determined via a call to the
76* ScaLAPACK tool function, NUMROC:
77* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
78* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
79* An upper bound for these quantities may be computed by:
80* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
81* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
82*
83* Arguments
84* =========
85*
86* SYMM (global input) CHARACTER
87* if SYMM = 'H', sub( A ) is a hermitian distributed matrix,
88* otherwise sub( A ) is a general distributed matrix.
89*
90* DIAG (global input) CHARACTER
91* If DIAG = 'D', sub( A ) is diagonally dominant.
92*
93* N (global input) INTEGER
94* The number of columns to be operated on, i.e. the number of
95* columns of the distributed submatrix sub( A ). N >= 0.
96*
97* NRHS (global input) INTEGER
98* The number of right-hand-sides, i.e the number of columns
99* of the distributed matrix sub( X ). NRHS >= 0.
100*
101* X (local input) COMPLEX pointer into the local memory
102* to an array of dimension (LLD_X,LOCc(JX+NRHS-1). This array
103* contains the local pieces of the answer vector(s) sub( X ) of
104* sub( A ) sub( X ) - B, split up over a column of processes.
105*
106* IX (global input) INTEGER
107* The row index in the global array X indicating the first
108* row of sub( X ).
109*
110* JX (global input) INTEGER
111* The column index in the global array X indicating the
112* first column of sub( X ).
113*
114* DESCX (global and local input) INTEGER array of dimension DLEN_.
115* The array descriptor for the distributed matrix X.
116*
117* IASEED (global input) INTEGER
118* The seed number to generate the original matrix Ao.
119*
120* IA (global input) INTEGER
121* The row index in the global array A indicating the first
122* row of sub( A ).
123*
124* JA (global input) INTEGER
125* The column index in the global array A indicating the
126* first column of sub( A ).
127*
128* DESCA (global and local input) INTEGER array of dimension DLEN_.
129* The array descriptor for the distributed matrix A.
130*
131* IBSEED (global input) INTEGER
132* The seed number to generate the original matrix B.
133*
134* ANORM (global input) REAL
135* The 1-norm or infinity norm of the distributed matrix
136* sub( A ).
137*
138* RESID (global output) REAL
139* The residual error:
140* ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N).
141*
142* WORK (local workspace) COMPLEX array, dimension (LWORK)
143* LWORK >= MAX(1,Np)*NB_X + Nq*NB_X + MAX( MAX(NQ*MB_A,2*NB_X),
144* NB_X * NUMROC( NUMROC(N,MB_X,0,0,NPCOL), MB_X, 0, 0, LCMQ ) )
145*
146* =====================================================================
147*
148* .. Parameters ..
149 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
150 $ LLD_, MB_, M_, NB_, N_, RSRC_
151 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
152 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
153 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
154 COMPLEX ZERO, ONE
155 PARAMETER ( ONE = ( 1.0e+0, 0.0e+0 ),
156 $ zero = ( 0.0e+0, 0.0e+0 ) )
157* ..
158* .. Local Scalars ..
159 INTEGER IACOL, IAROW, IB, ICOFF, ICTXT, ICURCOL, IDUMM,
160 $ II, IIA, IIX, IOFFX, IPA, IPB, IPW, IPX, IROFF,
161 $ ixcol, ixrow, j, jbrhs, jj, jja, jjx, ldx,
162 $ mycol, myrow, np, npcol, nprow, nq
163 REAL DIVISOR, EPS, RESID1
164 COMPLEX BETA
165* ..
166* .. External Subroutines ..
167 EXTERNAL blacs_gridinfo, cgamx2d, cgemm, cgsum2d,
169 $ sgebs2d, sgerv2d, sgesd2d
170* ..
171* .. External Functions ..
172 INTEGER ICAMAX, NUMROC
173 REAL PSLAMCH
174 EXTERNAL icamax, numroc, pslamch
175* ..
176* .. Intrinsic Functions ..
177 INTRINSIC abs, max, min, mod, real
178* ..
179* .. Executable Statements ..
180*
181* Get needed initial parameters
182*
183 ictxt = desca( ctxt_ )
184 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
185*
186 eps = pslamch( ictxt, 'eps' )
187 resid = 0.0e+0
188 divisor = anorm * eps * real( n )
189*
190 CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia, jja,
191 $ iarow, iacol )
192 CALL infog2l( ix, jx, descx, nprow, npcol, myrow, mycol, iix, jjx,
193 $ ixrow, ixcol )
194 iroff = mod( ia-1, desca( mb_ ) )
195 icoff = mod( ja-1, desca( nb_ ) )
196 np = numroc( n+iroff, desca( mb_ ), myrow, iarow, nprow )
197 nq = numroc( n+icoff, desca( nb_ ), mycol, iacol, npcol )
198*
199 ldx = max( 1, np )
200 ipb = 1
201 ipx = ipb + np * descx( nb_ )
202 ipa = ipx + nq * descx( nb_ )
203*
204 IF( myrow.EQ.iarow )
205 $ np = np - iroff
206 IF( mycol.EQ.iacol )
207 $ nq = nq - icoff
208*
209 icurcol = ixcol
210*
211* Loop over the rhs
212*
213 DO 40 j = 1, nrhs, descx( nb_ )
214 jbrhs = min( descx( nb_ ), nrhs-j+1 )
215*
216* Transpose x from ICURCOL to all rows
217*
218 ioffx = iix + ( jjx - 1 ) * descx( lld_ )
219 CALL pbctran( ictxt, 'Column', 'transpose', N, JBRHS,
220 $ DESCX( MB_ ), X( IOFFX ), DESCX( LLD_ ), ZERO,
221 $ WORK( IPX ), JBRHS, IXROW, ICURCOL, -1, IACOL,
222 $ WORK( IPA ) )
223*
224* Regenerate B in IXCOL
225*
226.EQ. IF( MYCOLICURCOL ) THEN
227 CALL PCMATGEN( ICTXT, 'n', 'n', DESCX( M_ ), DESCX( N_ ),
228 $ DESCX( MB_ ), DESCX( NB_ ), WORK( IPB ), LDX,
229 $ IXROW, IXCOL, IBSEED, IIX-1, NP, JJX-1,
230 $ JBRHS, MYROW, MYCOL, NPROW, NPCOL )
231 BETA = ONE
232 ELSE
233 BETA = ZERO
234 END IF
235*
236.GT. IF( NQ0 ) THEN
237 DO 10 II = IIA, IIA+NP-1, DESCA( MB_ )
238 IB = MIN( DESCA( MB_ ), IIA+NP-II )
239*
240* Regenerate ib rows of the matrix A(IA:IA+N-1,JA:JA+N-1).
241*
242 CALL PCMATGEN( ICTXT, SYMM, DIAG, DESCA( M_ ),
243 $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ),
244 $ WORK( IPA ), IB, DESCA( RSRC_ ),
245 $ DESCA( CSRC_ ), IASEED, II-1, IB,
246 $ JJA-1, NQ, MYROW, MYCOL, NPROW, NPCOL )
247*
248* Compute B <= B - A * X.
249*
250 CALL CGEMM( 'no transpose', 'transpose', IB, JBRHS, NQ,
251 $ -ONE, WORK( IPA ), IB, WORK( IPX ), JBRHS,
252 $ BETA, WORK( IPB+II-IIA ), LDX )
253*
254 10 CONTINUE
255*
256.NE. ELSE IF( MYCOLICURCOL ) THEN
257*
258 CALL CLASET( 'all', NP, JBRHS, ZERO, ZERO, WORK( IPB ),
259 $ LDX )
260*
261 END IF
262*
263* Add B rowwise to ICURCOL
264*
265 CALL CGSUM2D( ICTXT, 'row', ' ', NP, JBRHS, WORK( IPB ), LDX,
266 $ MYROW, ICURCOL )
267*
268.EQ. IF( MYCOLICURCOL ) THEN
269*
270* Figure || A * X - B || & || X ||
271*
272 IPW = IPA + JBRHS
273 DO 20 JJ = 0, JBRHS - 1
274.GT. IF( NP0 ) THEN
275 II = ICAMAX( NP, WORK( IPB+JJ*LDX ), 1 )
276 WORK( IPA+JJ ) = ABS( WORK( IPB+II-1+JJ*LDX ) )
277 WORK( IPW+JJ ) = ABS( X( IOFFX + ICAMAX( NP,
278 $ X( IOFFX + JJ*DESCX( LLD_ ) ), 1 )-1+JJ*
279 $ DESCX( LLD_ ) ) )
280 ELSE
281 WORK( IPA+JJ ) = ZERO
282 WORK( IPW+JJ ) = ZERO
283 END IF
284 20 CONTINUE
285*
286* After CGAMX2D computation,
287* WORK(IPB) has the maximum of || Ax - b ||, and
288* WORK(IPX) has the maximum of || X ||.
289*
290 CALL CGAMX2D( ICTXT, 'column', ' ', 1, 2*JBRHS,
291 $ WORK( IPA ), 1, IDUMM, IDUMM, -1, 0, ICURCOL )
292*
293* Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N)
294*
295.EQ. IF( MYROW0 ) THEN
296 DO 30 JJ = 0, JBRHS - 1
297 RESID1 = REAL( WORK( IPA+JJ ) ) /
298 $ ( REAL( WORK( IPW+JJ ) )*DIVISOR )
299.LT. IF( RESIDRESID1 )
300 $ RESID = RESID1
301 30 CONTINUE
302.NE. IF( MYCOL0 )
303 $ CALL SGESD2D( ICTXT, 1, 1, RESID, 1, 0, 0 )
304 END IF
305*
306.EQ..AND..EQ. ELSE IF( MYROW0 MYCOL0 ) THEN
307*
308 CALL SGERV2D( ICTXT, 1, 1, RESID1, 1, 0, ICURCOL )
309.LT. IF( RESIDRESID1 )
310 $ RESID = RESID1
311*
312 END IF
313*
314.EQ. IF( MYCOLICURCOL )
315 $ JJX = JJX + JBRHS
316 ICURCOL = MOD( ICURCOL+1, NPCOL )
317*
318 40 CONTINUE
319*
320.EQ..AND..EQ. IF( MYROW0 MYCOL0 ) THEN
321 CALL SGEBS2D( ICTXT, 'all', ' ', 1, 1, resid, 1 )
322 ELSE
323 CALL sgebr2d( ictxt, 'All', ' ', 1, 1, resid, 1, 0, 0 )
324 END IF
325*
326 RETURN
327*
328* End of PCLASCHK
329*
330 END
subroutine pcmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pcmatgen.f:4
subroutine claset(uplo, m, n, alpha, beta, a, lda)
CLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition claset.f:106
subroutine cgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
CGEMM
Definition cgemm.f:187
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
subroutine sgebs2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1072
subroutine sgebr2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1113
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition mpi.f:937
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
subroutine pbctran(icontxt, adist, trans, m, n, nb, a, lda, beta, c, ldc, iarow, iacol, icrow, iccol, work)
Definition pbctran.f:3
subroutine pclaschk(symm, diag, n, nrhs, x, ix, jx, descx, iaseed, ia, ja, desca, ibseed, anorm, resid, work)
Definition pclaschk.f:4