OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pdqrt14.f
Go to the documentation of this file.
1 DOUBLE PRECISION FUNCTION pdqrt14( TRANS, M, N, NRHS, A, IA, JA,
2 $ DESCA, X, IX, JX, DESCX, WORK )
3*
4* -- ScaLAPACK routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 1, 1997
8*
9* .. Scalar Arguments ..
10 CHARACTER trans
11 INTEGER ia, ix, ja, jx, m, n, nrhs
12* ..
13* .. Array Arguments ..
14 INTEGER desca( * ), descx( * )
15 DOUBLE PRECISION a( * ), work( * ), x( * )
16* ..
17*
18* Purpose
19* =======
20*
21* PDQRT14 checks whether sub( X ) is in the row space of sub( A ) or
22* sub( A )', where sub( A ) denotes A( IA:IA+M-1, JA:JA+N-1 ) and
23* sub( X ) denotes X( IX:IX+N-1, JX:JX+NRHS-1 ) if TRANS = 'N', and
24* X( IX:IX+N-1, JX:JX+NRHS-1 ) otherwise. It does so by scaling both
25* sub( X ) and sub( A ) such that their norms are in the range
26* [sqrt(eps), 1/sqrt(eps)], then computing an LQ factorization of
27* [sub( A )',sub( X )]' (if TRANS = 'N') or a QR factorization of
28* [sub( A ),sub( X )] otherwise, and returning the norm of the trailing
29* triangle, scaled by MAX(M,N,NRHS)*eps.
30*
31* Notes
32* =====
33*
34* Each global data object is described by an associated description
35* vector. This vector stores the information required to establish
36* the mapping between an object element and its corresponding process
37* and memory location.
38*
39* Let A be a generic term for any 2D block cyclicly distributed array.
40* Such a global array has an associated description vector DESCA.
41* In the following comments, the character _ should be read as
42* "of the global array".
43*
44* NOTATION STORED IN EXPLANATION
45* --------------- -------------- --------------------------------------
46* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
47* DTYPE_A = 1.
48* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
49* the BLACS process grid A is distribu-
50* ted over. The context itself is glo-
51* bal, but the handle (the integer
52* value) may vary.
53* M_A (global) DESCA( M_ ) The number of rows in the global
54* array A.
55* N_A (global) DESCA( N_ ) The number of columns in the global
56* array A.
57* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
58* the rows of the array.
59* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
60* the columns of the array.
61* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
62* row of the array A is distributed.
63* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
64* first column of the array A is
65* distributed.
66* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
67* array. LLD_A >= MAX(1,LOCr(M_A)).
68*
69* Let K be the number of rows or columns of a distributed matrix,
70* and assume that its process grid has dimension p x q.
71* LOCr( K ) denotes the number of elements of K that a process
72* would receive if K were distributed over the p processes of its
73* process column.
74* Similarly, LOCc( K ) denotes the number of elements of K that a
75* process would receive if K were distributed over the q processes of
76* its process row.
77* The values of LOCr() and LOCc() may be determined via a call to the
78* ScaLAPACK tool function, NUMROC:
79* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
80* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
81* An upper bound for these quantities may be computed by:
82* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
83* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
84*
85* Arguments
86* =========
87*
88* TRANS (global input) CHARACTER*1
89* = 'N': No transpose, check for sub( X ) in the row space of
90* sub( A ),
91* = 'T': Transpose, check for sub( X ) in row space of
92* sub( A )'.
93*
94* M (global input) INTEGER
95* The number of rows to be operated on, i.e. the number of rows
96* of the distributed submatrix sub( A ). M >= 0.
97*
98* N (global input) INTEGER
99* The number of columns to be operated on, i.e. the number of
100* columns of the distributed submatrix sub( A ). N >= 0.
101*
102* NRHS (global input) INTEGER
103* The number of right hand sides, i.e., the number of columns
104* of the distributed submatrix sub( X ). NRHS >= 0.
105*
106* A (local input) DOUBLE PRECISION pointer into the local memory
107* to an array of dimension (LLD_A, LOCc(JA+N-1)). This array
108* contains the local pieces of the M-by-N distributed matrix
109* sub( A ).
110*
111* IA (global input) INTEGER
112* The row index in the global array A indicating the first
113* row of sub( A ).
114*
115* JA (global input) INTEGER
116* The column index in the global array A indicating the
117* first column of sub( A ).
118*
119* DESCA (global and local input) INTEGER array of dimension DLEN_.
120* The array descriptor for the distributed matrix A.
121*
122* X (local input) DOUBLE PRECISION pointer into the local
123* memory to an array of dimension (LLD_X,LOCc(JX+NRHS-1)).
124* On entry, this array contains the local pieces of the
125* N-by-NRHS distributed submatrix sub( X ) if TRANS = 'N',
126* and the M-by-NRHS distributed submatrix sub( X ) otherwise.
127*
128* IX (global input) INTEGER
129* The row index in the global array X indicating the first
130* row of sub( X ).
131*
132* JX (global input) INTEGER
133* The column index in the global array X indicating the
134* first column of sub( X ).
135*
136* DESCX (global and local input) INTEGER array of dimension DLEN_.
137* The array descriptor for the distributed matrix X.
138*
139* WORK (local workspace) DOUBLE PRECISION array dimension (LWORK)
140* If TRANS='N', LWORK >= MNRHSP * NQ + LTAU + LWF and
141* LWORK >= MP * NNRHSQ + LTAU + LWF otherwise, where
142*
143* IF TRANS='N', (LQ fact)
144* MNRHSP = NUMROC( M+NRHS+IROFFA, MB_A, MYROW, IAROW,
145* NPROW )
146* LTAU = NUMROC( IA+MIN( M+NRHS, N )-1, MB_A, MYROW,
147* RSRC_A, NPROW )
148* LWF = MB_A * ( MB_A + MNRHSP + NQ0 )
149* ELSE (QR fact)
150* NNRHSQ = NUMROC( N+NRHS+ICOFFA, NB_A, MYCOL, IACOL,
151* NPCOL )
152* LTAU = NUMROC( JA+MIN( M, N+NRHS )-1, NB_A, MYCOL,
153* CSRC_A, NPCOL )
154* LWF = NB_A * ( NB_A + MP0 + NNRHSQ )
155* END IF
156*
157* and,
158*
159* IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ),
160* IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
161* IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),
162* MP0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ),
163* NQ0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ).
164*
165* INDXG2P and NUMROC are ScaLAPACK tool functions;
166* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
167* the subroutine BLACS_GRIDINFO.
168*
169*
170* =====================================================================
171*
172* .. Parameters ..
173 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
174 $ lld_, mb_, m_, nb_, n_, rsrc_
175 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
176 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
177 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
178 DOUBLE PRECISION one, zero
179 parameter( zero = 0.0d+0, one = 1.0d+0 )
180* ..
181* .. Local Scalars ..
182 LOGICAL tpsd
183 INTEGER iacol, iarow, icoffa, ictxt, IDUM, iia, info,
184 $ iptau, ipw, ipwa, iroffa, iwa, iwx, j, jja,
185 $ jwa, jwx, ldw, lwork, mpwa, mpw, mqw, mycol,
186 $ myrow, npcol, nprow, npw, nqwa, nqw
187 DOUBLE PRECISION amax, anrm, err, xnrm
188* ..
189* .. Local Arrays ..
190 INTEGER descw( dlen_ ), idum1( 1 ), idum2( 1 )
191 DOUBLE PRECISION rwork( 1 )
192* ..
193* .. External Functions ..
194 LOGICAL lsame
195 INTEGER numroc
196 DOUBLE PRECISION pdlange, pdlamch
197 EXTERNAL lsame, numroc, pdlange, pdlamch
198* ..
199* .. External Subroutines ..
200 EXTERNAL blacs_gridinfo, descset, dgamx2d, infog2l,
201 $ pdamax, pdcopy, pdgelqf, pdgeqrf,
203* ..
204* .. Intrinsic Functions ..
205 INTRINSIC abs, dble, max, min, mod
206* ..
207* .. Executable Statements ..
208*
209* Get grid parameters
210*
211 ictxt = desca( ctxt_ )
212 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
213*
214 pdqrt14 = zero
215*
216 ipwa = 1
217 iroffa = mod( ia-1, desca( mb_ ) )
218 icoffa = mod( ja-1, desca( nb_ ) )
219 iwa = iroffa + 1
220 jwa = icoffa + 1
221 CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia,
222 $ jja, iarow, iacol )
223 mpwa = numroc( m+iroffa, desca( mb_ ), myrow, iarow, nprow )
224 nqwa = numroc( n+icoffa, desca( nb_ ), mycol, iacol, npcol )
225*
226 info = 0
227 IF( lsame( trans, 'n' ) ) THEN
228.LE..OR..LE. IF( N0 NRHS0 )
229 $ RETURN
230 TPSD = .FALSE.
231 MPW = NUMROC( M+NRHS+IROFFA, DESCA( MB_ ), MYROW, IAROW,
232 $ NPROW )
233 NQW = NQWA
234*
235* Assign descriptor DESCW for workspace WORK and pointers to
236* matrices sub( A ) and sub( X ) in workspace
237*
238 IWX = IWA + M
239 JWX = JWA
240 LDW = MAX( 1, MPW )
241 CALL DESCSET( DESCW, M+NRHS+IROFFA, N+ICOFFA, DESCA( MB_ ),
242 $ DESCA( NB_ ), IAROW, IACOL, ICTXT, LDW )
243*
244 ELSE IF( LSAME( TRANS, 't' ) ) THEN
245.LE..OR..LE. IF( M0 NRHS0 )
246 $ RETURN
247 TPSD = .TRUE.
248 MPW = MPWA
249 NQW = NUMROC( N+NRHS+ICOFFA, DESCA( NB_ ), MYCOL, IACOL,
250 $ NPCOL )
251*
252* Assign descriptor DESCW for workspace WORK and pointers to
253* matrices sub( A ) and sub( X ) in workspace
254*
255 IWX = IWA
256 JWX = JWA + N
257 LDW = MAX( 1, MPW )
258 CALL DESCSET( DESCW, M+IROFFA, N+NRHS+ICOFFA, DESCA( MB_ ),
259 $ DESCA( NB_ ), IAROW, IACOL, ICTXT, LDW )
260 ELSE
261 CALL PXERBLA( ICTXT, 'pdqrt14', -1 )
262 RETURN
263 END IF
264*
265* Copy and scale sub( A )
266*
267 IPTAU = IPWA + MPW*NQW
268 CALL PDLACPY( 'all', M, N, A, IA, JA, DESCA, WORK( IPWA ), IWA,
269 $ JWA, DESCW )
270 RWORK( 1 ) = ZERO
271 ANRM = PDLANGE( 'm', M, N, WORK( IPWA ), IWA, JWA, DESCW, RWORK )
272.NE. IF( ANRMZERO )
273 $ CALL PDLASCL( 'g', ANRM, ONE, M, N, WORK( IPWA ), IWA,
274 $ JWA, DESCW, INFO )
275*
276* Copy sub( X ) or sub( X )' into the right place and scale it
277*
278 IF( TPSD ) THEN
279*
280* Copy sub( X ) into columns jwa+n:jwa+n+nrhs-1 of work
281*
282 DO 10 J = 1, NRHS
283 CALL PDCOPY( M, X, IX, JX+J-1, DESCX, 1, WORK( IPWA ), IWX,
284 $ JWX+J-1, DESCW, 1 )
285 10 CONTINUE
286 XNRM = PDLANGE( 'm', M, NRHS, WORK( IPWA ), IWX, JWX, DESCW,
287 $ RWORK )
288.NE. IF( XNRMZERO )
289 $ CALL PDLASCL( 'g', XNRM, ONE, M, NRHS, WORK( IPWA ), IWX,
290 $ JWX, DESCW, INFO )
291*
292* Compute QR factorization of work(iwa:iwa+m-1,jwa:jwa+n+nrhs-1)
293*
294 MQW = NUMROC( M+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL )
295 IPW = IPTAU + MIN( MQW, NQW )
296 LWORK = DESCW( NB_ ) * ( MPW + NQW + DESCW( NB_ ) )
297 CALL PDGEQRF( M, N+NRHS, WORK( IPWA ), IWA, JWA, DESCW,
298 $ WORK( IPTAU ), WORK( IPW ), LWORK, INFO )
299*
300* Compute largest entry in upper triangle of
301* work(iwa+n:iwa+m-1,jwa+n:jwa+n+nrhs-1)
302*
303 ERR = ZERO
304.LT. IF( NM ) THEN
305 DO 20 J = JWX, JWA+N+NRHS-1
306 CALL PDAMAX( MIN(M-N,J-JWX+1), AMAX, IDUM, WORK( IPWA ),
307 $ IWA+N, J, DESCW, 1 )
308 ERR = MAX( ERR, ABS( AMAX ) )
309 20 CONTINUE
310 END IF
311 CALL DGAMX2D( ICTXT, 'all', ' ', 1, 1, ERR, 1, IDUM1, IDUM2,
312 $ -1, -1, 0 )
313*
314 ELSE
315*
316* Copy sub( X )' into rows iwa+m:iwa+m+nrhs-1 of work
317*
318 DO 30 J = 1, NRHS
319 CALL PDCOPY( N, X, IX, JX+J-1, DESCX, 1, WORK( IPWA ),
320 $ IWX+J-1, JWX, DESCW, DESCW( M_ ) )
321 30 CONTINUE
322*
323 XNRM = PDLANGE( 'm', NRHS, N, WORK( IPWA ), IWX, JWX, DESCW,
324 $ RWORK )
325.NE. IF( XNRMZERO )
326 $ CALL PDLASCL( 'g', XNRM, ONE, NRHS, N, WORK( IPWA ), IWX,
327 $ JWX, DESCW, INFO )
328*
329* Compute LQ factorization of work(iwa:iwa+m+nrhs-1,jwa:jwa+n-1)
330*
331 NPW = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW )
332 IPW = IPTAU + MIN( MPW, NPW )
333 LWORK = DESCW( MB_ ) * ( MPW + NQW + DESCW( MB_ ) )
334 CALL PDGELQF( M+NRHS, N, WORK( IPWA ), IWA, JWA, DESCW,
335 $ WORK( IPTAU ), WORK( IPW ), LWORK, INFO )
336*
337* Compute largest entry in lower triangle in
338* work(iwa+m:iwa+m+nrhs-1,jwa+m:jwa+n-1)
339*
340 ERR = ZERO
341 DO 40 J = JWA+M, MIN( JWA+N-1, JWA+M+NRHS-1 )
342 CALL PDAMAX( JWA+M+NRHS-J, AMAX, IDUM, WORK( IPWA ),
343 $ IWX+J-JWA-M, J, DESCW, 1 )
344 ERR = MAX( ERR, ABS( AMAX ) )
345 40 CONTINUE
346 CALL DGAMX2D( ICTXT, 'all', ' ', 1, 1, ERR, 1, IDUM1, IDUM2,
347 $ -1, -1, 0 )
348*
349 END IF
350*
351 PDQRT14 = ERR / ( DBLE( MAX( M, N, NRHS ) ) *
352 $ PDLAMCH( ICTXT, 'epsilon' ) )
353*
354 RETURN
355*
356* End of PDQRT14
357*
358 END
logical function lsame(ca, cb)
LSAME
Definition lsame.f:53
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
subroutine pxerbla(contxt, srname, info)
Definition mpi.f:1600
subroutine descset(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld)
Definition mpi.f:1610
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition mpi.f:937
double precision function pdlange(norm, m, n, a, ia, ja, desca, work)
Definition mpi.f:1311
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition mpi.f:786
double precision function pdlamch(ictxt, cmach)
Definition pdblastst.f:6769
subroutine pdgelqf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition pdgelqf.f:3
subroutine pdgeqrf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition pdgeqrf.f:3
subroutine pdlacpy(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb)
Definition pdlacpy.f:3
subroutine pdlascl(type, cfrom, cto, m, n, a, ia, ja, desca, info)
Definition pdlascl.f:3
double precision function pdqrt14(trans, m, n, nrhs, a, ia, ja, desca, x, ix, jx, descx, work)
Definition pdqrt14.f:3