OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pbstrnv.f
Go to the documentation of this file.
1 SUBROUTINE pbstrnv( ICONTXT, XDIST, TRANS, N, NB, NZ, X, INCX,
2 $ BETA, Y, INCY, IXROW, IXCOL, IYROW, IYCOL,
3 $ WORK )
4*
5* -- PB-BLAS routine (version 2.1) --
6* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
7* April 28, 1996
8*
9* Jaeyoung Choi, Oak Ridge National Laboratory
10* Jack Dongarra, University of Tennessee and Oak Ridge National Lab.
11* David Walker, Oak Ridge National Laboratory
12*
13* .. Scalar Arguments ..
14 CHARACTER*1 TRANS, XDIST
15 INTEGER ICONTXT, INCX, INCY, IXCOL, IXROW, IYCOL,
16 $ iyrow, n, nb, nz
17 REAL BETA
18* ..
19* .. Array Arguments ..
20 REAL WORK( * ), X( * ), Y( * )
21* ..
22*
23* Purpose
24* =======
25*
26* PBSTRNV transposes a column vector to row vector, or a row vector to
27* column vector by reallocating data distribution.
28*
29* Y := X'
30*
31* where X and Y are N vectors.
32*
33* Parameters
34* ==========
35*
36* ICONTXT (input) INTEGER
37* ICONTXT is the BLACS mechanism for partitioning communication
38* space. A defining property of a context is that a message in
39* a context cannot be sent or received in another context. The
40* BLACS context includes the definition of a grid, and each
41* process' coordinates in it.
42*
43* XDIST (input) CHARACTER*1
44* XDIST specifies whether X is a column vector or a row vector,
45*
46* XDIST = 'C', X is a column vector (distributed columnwise)
47* XDIST = 'R', X is a row vector (distributed rowwise)
48*
49* TRANS (input) CHARACTER*1
50* TRANS specifies whether the transposed format is transpose
51* or conjugate transpose. If the vectors X and Y are real,
52* the argument is ignored.
53*
54* TRANS = 'T', transpose
55* TRANS = 'C', conjugate transpose
56*
57* N (input) INTEGER
58* N specifies the (global) number of the vector X and the
59* vector Y. N >= 0.
60*
61* NB (input) INTEGER
62* NB specifies the block size of vectors X and Y. NB >= 0.
63*
64* NZ (input) INTEGER
65* NZ is the column offset to specify the column distance from
66* the beginning of the block to the first element of the
67* vector X, and the row offset to the first element of the
68* vector Y if XDIST = 'C'.
69* Otherwise, it is row offset to specify the row distance
70* from the beginning of the block to the first element of the
71* vector X, and the column offset to the first element of the
72* vector Y. 0 < NZ <= NB.
73*
74* X (input) REAL array of dimension at least
75* ( 1 + (Np-1) * abs(INCX)) in IXCOL if XDIST = 'C', or
76* ( 1 + (Nq-1) * abs(INCX)) in IXROW if XDIST = 'R'.
77* The incremented array X must contain the vector X.
78*
79* INCX (input) INTEGER
80* INCX specifies the increment for the elements of X.
81* INCX <> 0.
82*
83* BETA (input) REAL
84* BETA specifies scaler beta.
85*
86* Y (input/output) REAL array of dimension at least
87* ( 1 + (Nq-1) * abs(INCY)) in IYROW if XDIST = 'C', or
88* ( 1 + (Np-1) * abs(INCY)) in IYCOL if XDIST = 'R', or
89* The incremented array Y must contain the vector Y.
90* Y will not be referenced if beta is zero.
91*
92* INCY (input) INTEGER
93* INCY specifies the increment for the elements of Y.
94* INCY <> 0.
95*
96* IXROW (input) INTEGER
97* IXROW specifies a row of the process template, which holds
98* the first element of the vector X. If X is a row vector and
99* all rows of processes have a copy of X, then set IXROW = -1.
100*
101* IXCOL (input) INTEGER
102* IXCOL specifies a column of the process template,
103* which holds the first element of the vector X. If X is a
104* column block and all columns of processes have a copy of X,
105* then set IXCOL = -1.
106*
107* IYROW (input) INTEGER
108* IYROW specifies the current row process which holds the
109* first element of the vector Y, which is transposed of X.
110* If X is a column vector and the transposed row vector Y is
111* distributed all rows of processes, set IYROW = -1.
112*
113* IYCOL (input) INTEGER
114* IYCOL specifies the current column process which holds
115* the first element of the vector Y, which is transposed of Y.
116* If X is a row block and the transposed column vector Y is
117* distributed all columns of processes, set IYCOL = -1.
118*
119* WORK (workspace) REAL array of dimension Size(WORK).
120* It needs extra working space of x**T or x**H.
121*
122* Parameters Details
123* ==================
124*
125* Nx It is a local portion of N owned by a process, where x is
126* replaced by either p (=NPROW) or q (=NPCOL)). The value is
127* determined by N, NB, NZ, x, and MI, where NB is a block size,
128* NZ is a offset from the beginning of the block, and MI is a
129* row or column position in a process template. Nx is equal
130* to or less than Nx0 = CEIL( N+NZ, NB*x ) * NB.
131*
132* Communication Scheme
133* ====================
134*
135* The communication scheme of the routine is set to '1-tree', which is
136* fan-out. (For details, see BLACS user's guide.)
137*
138* Memory Requirement of WORK
139* ==========================
140*
141* NN = N + NZ
142* Npb = CEIL( NN, NB*NPROW )
143* Nqb = CEIL( NN, NB*NPCOL )
144* LCMP = LCM / NPROW
145* LCMQ = LCM / NPCOL
146*
147* (1) XDIST = 'C'
148* (a) IXCOL != -1
149* Size(WORK) = CEIL(Nqb,LCMQ)*NB
150* (b) IXCOL = -1
151* Size(WORK) = CEIL(Nqb,LCMQ)*NB * MIN(LCMQ,CEIL(NN,NB))
152*
153* (2) XDIST = 'R'
154* (a) IXROW != -1
155* Size(WORK) = CEIL(Npb,LCMP)*NB
156* (b) IXROW = -1
157* Size(WORK) = CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(NN,NB))
158*
159* Notes
160* -----
161* More precise space can be computed as
162*
163* CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(NN,NB,0,0,NPROW), NB, 0, 0, LCMP)
164* CEIL(Nqb,LCMQ)*NB => NUMROC( NUMROC(NN,NB,0,0,NPCOL), NB, 0, 0, LCMQ)
165*
166* =====================================================================
167*
168* .. Parameters ..
169 REAL ONE, ZERO
170 PARAMETER ( ONE = 1.0e+0, zero = 0.0e+0 )
171* ..
172* .. Local Scalars ..
173 LOGICAL COLFORM, ROWFORM
174 INTEGER I, IDEX, IGD, INFO, JDEX, JYCOL, JYROW, JZ, KZ,
175 $ lcm, lcmp, lcmq, mccol, mcrow, mrcol, mrrow,
176 $ mycol, myrow, nn, np, np0, np1, npcol, nprow,
177 $ nq, nq0, nq1
178 REAL TBETA
179* ..
180* .. External Functions ..
181 LOGICAL LSAME
182 INTEGER ILCM, ICEIL, NUMROC
183 EXTERNAL lsame, ilcm, iceil, numroc
184* ..
185* .. External Subroutines ..
188 $ sgerv2d, sgesd2d
189* ..
190* .. Intrinsic Functions ..
191 INTRINSIC max, min, mod
192* ..
193* .. Executable Statements ..
194*
195* Quick return if possible.
196*
197 IF( n.EQ.0 ) RETURN
198*
199 CALL blacs_gridinfo( icontxt, nprow, npcol, myrow, mycol )
200*
201 colform = lsame( xdist, 'C' )
202 rowform = lsame( xdist, 'R' )
203*
204* Test the input parameters.
205*
206 info = 0
207 IF( ( .NOT.colform ) .AND. ( .NOT.rowform ) ) THEN
208 info = 2
209 ELSE IF( n .LT.0 ) THEN
210 info = 4
211 ELSE IF( nb .LT.1 ) THEN
212 info = 5
213 ELSE IF( nz .LT.0 .OR. nz.GE.nb ) THEN
214 info = 6
215 ELSE IF( incx.EQ.0 ) THEN
216 info = 8
217 ELSE IF( incy.EQ.0 ) THEN
218 info = 11
219 ELSE IF( ixrow.LT.-1 .OR. ixrow.GE.nprow .OR.
220 $ ( ixrow.EQ.-1 .AND. colform ) ) THEN
221 info = 12
222 ELSE IF( ixcol.LT.-1 .OR. ixcol.GE.npcol .OR.
223 $ ( ixcol.EQ.-1 .AND. rowform ) ) THEN
224 info = 13
225 ELSE IF( iyrow.LT.-1 .OR. iyrow.GE.nprow .OR.
226 $ ( iyrow.EQ.-1 .AND. rowform ) ) THEN
227 info = 14
228 ELSE IF( iycol.LT.-1 .OR. iycol.GE.npcol .OR.
229 $ ( iycol.EQ.-1 .AND. colform ) ) THEN
230 info = 15
231 END IF
232*
233 10 CONTINUE
234 IF( info.NE.0 ) THEN
235 CALL pxerbla( icontxt, 'pbstrnv ', INFO )
236 RETURN
237 END IF
238*
239* Start the operations.
240*
241* LCM : the least common multiple of NPROW and NPCOL
242*
243 LCM = ILCM( NPROW, NPCOL )
244 LCMP = LCM / NPROW
245 LCMQ = LCM / NPCOL
246 IGD = NPCOL / LCMP
247 NN = N + NZ
248*
249* When x is a column vector
250*
251 IF( COLFORM ) THEN
252*
253* Form y <== x' ( x is a column vector )
254*
255* ||
256* ||
257* _____________ ||
258* -----(y)----- <== (x)
259* ||
260* ||
261* ||
262*
263.LT..OR..GE. IF( IXROW0 IXROWNPROW ) THEN
264 INFO = 12
265.LT..OR..GE. ELSE IF( IXCOL-1 IXCOLNPCOL ) THEN
266 INFO = 13
267.LT..OR..GE. ELSE IF( IYROW-1 IYROWNPROW ) THEN
268 INFO = 14
269.LT..OR..GE. ELSE IF( IYCOL0 IYCOLNPCOL ) THEN
270 INFO = 15
271 END IF
272.NE. IF( INFO0 ) GO TO 10
273*
274* MRROW : row relative position in template from IXROW
275* MRCOL : column relative position in template from IYCOL
276*
277 MRROW = MOD( NPROW+MYROW-IXROW, NPROW )
278 MRCOL = MOD( NPCOL+MYCOL-IYCOL, NPCOL )
279 JYROW = IYROW
280.EQ. IF( IYROW-1 ) JYROW = IXROW
281*
282 NP = NUMROC( NN, NB, MYROW, IXROW, NPROW )
283.EQ. IF( MRROW0 ) NP = NP - NZ
284 NQ = NUMROC( NN, NB, MYCOL, IYCOL, NPCOL )
285.EQ. IF( MRCOL0 ) NQ = NQ - NZ
286 NQ0 = NUMROC( NUMROC(NN, NB, 0, 0, NPCOL), NB, 0, 0, LCMQ )
287*
288* When a column process of IXCOL has a column block A,
289*
290.GE. IF( IXCOL 0 ) THEN
291 TBETA = ZERO
292.EQ. IF( MYROWJYROW ) TBETA = BETA
293 KZ = NZ
294*
295 DO 20 I = 0, MIN( LCM, ICEIL(NN,NB) ) - 1
296 MCROW = MOD( MOD(I, NPROW) + IXROW, NPROW )
297 MCCOL = MOD( MOD(I, NPCOL) + IYCOL, NPCOL )
298.EQ. IF( LCMQ1 ) NQ0 = NUMROC( NN, NB, I, 0, NPCOL )
299 JDEX = (I/NPCOL) * NB
300.EQ. IF( MRCOL0 ) JDEX = MAX(0, JDEX-NZ)
301*
302* A source node copies the blocks to WORK, and send it
303*
304.EQ..AND..EQ. IF( MYROWMCROW MYCOLIXCOL ) THEN
305*
306* The source node is a destination node
307*
308 IDEX = (I/NPROW) * NB
309.EQ. IF( MRROW0 ) IDEX = MAX( 0, IDEX-NZ )
310.EQ..AND..EQ. IF( MYROWJYROW MYCOLMCCOL ) THEN
311 CALL PBSTR2B1( ICONTXT, TRANS, NP-IDEX, NB, KZ,
312 $ X(IDEX*INCX+1), INCX, TBETA,
313 $ Y(JDEX*INCY+1), INCY, LCMP, LCMQ )
314*
315* The source node sends blocks to a destination node
316*
317 ELSE
318 CALL PBSTR2B1( ICONTXT, TRANS, NP-IDEX, NB, KZ,
319 $ X(IDEX*INCX+1), INCX, ZERO, WORK, 1,
320 $ LCMP, 1 )
321 CALL SGESD2D( ICONTXT, 1, NQ0-KZ, WORK, 1,
322 $ JYROW, MCCOL )
323 END IF
324*
325* A destination node receives the copied vector
326*
327.EQ..AND..EQ. ELSE IF( MYROWJYROW MYCOLMCCOL ) THEN
328.EQ..AND..EQ. IF( LCMQ1 TBETAZERO ) THEN
329 CALL SGERV2D( ICONTXT, 1, NQ0-KZ, Y, INCY,
330 $ MCROW, IXCOL )
331 ELSE
332 CALL SGERV2D( ICONTXT, 1, NQ0-KZ, WORK, 1,
333 $ MCROW, IXCOL )
334 CALL PBSTR2A1( ICONTXT, NQ-JDEX, NB, KZ, WORK, 1, TBETA,
335 $ Y(JDEX*INCY+1), INCY, LCMQ*NB )
336 END IF
337 END IF
338 KZ = 0
339 20 CONTINUE
340*
341* Broadcast a row block of WORK in each column of template
342*
343.EQ. IF( IYROW-1 ) THEN
344.EQ. IF( MYROWJYROW ) THEN
345 CALL SGEBS2D( ICONTXT, 'col', '1-tree', 1, NQ, Y, INCY )
346 ELSE
347 CALL SGEBR2D( ICONTXT, 'col', '1-tree', 1, NQ, Y, INCY,
348 $ JYROW, MYCOL )
349 END IF
350 END IF
351*
352* When all column procesors have a copy of the column block A,
353*
354 ELSE
355.EQ. IF( LCMQ1 ) NQ0 = NQ
356*
357* Processors, which have diagonal blocks of X, copy them to
358* WORK array in transposed form
359*
360 KZ = 0
361.EQ. IF( MRROW0 ) KZ = NZ
362 JZ = 0
363.EQ..AND..EQ. IF( MRROW0 MYCOLIYCOL ) JZ = NZ
364*
365 DO 30 I = 0, LCMP - 1
366.EQ. IF( MRCOLMOD(NPROW*I+MRROW, NPCOL) ) THEN
367 IDEX = MAX( 0, I*NB-KZ )
368.EQ..AND..EQ..OR..EQ. IF( LCMQ1 (IYROW-1IYROWMYROW) ) THEN
369 CALL PBSTR2B1( ICONTXT, TRANS, NP-IDEX, NB, JZ,
370 $ X(IDEX*INCX+1), INCX, BETA, Y, INCY,
371 $ LCMP, 1 )
372 ELSE
373 CALL PBSTR2B1( ICONTXT, TRANS, NP-IDEX, NB, JZ,
374 $ X(IDEX*INCX+1), INCX, ZERO, WORK, 1,
375 $ LCMP, 1 )
376 END IF
377 END IF
378 30 CONTINUE
379*
380* Get diagonal blocks of A for each column of the template
381*
382 MCROW = MOD( MOD(MRCOL, NPROW) + IXROW, NPROW )
383.GT. IF( LCMQ1 ) THEN
384 MCCOL = MOD( NPCOL+MYCOL-IYCOL, NPCOL )
385 CALL PBSTRGET( ICONTXT, 'row', 1, NQ0, ICEIL( NN, NB ),
386 $ WORK, 1, MCROW, MCCOL, IGD, MYROW, MYCOL,
387 $ NPROW, NPCOL )
388 END IF
389*
390* Broadcast a row block of WORK in every row of template
391*
392.EQ. IF( IYROW-1 ) THEN
393.EQ. IF( MYROWMCROW ) THEN
394.GT. IF( LCMQ1 ) THEN
395 KZ = 0
396.EQ. IF( MYCOLIYCOL ) KZ = NZ
397 CALL PBSTRST1( ICONTXT, 'row', NQ, NB, KZ, WORK, 1,
398 $ BETA, Y, INCY, LCMP, LCMQ, NQ0 )
399 END IF
400 CALL SGEBS2D( ICONTXT, 'col', '1-tree', 1, NQ, Y, INCY )
401 ELSE
402 CALL SGEBR2D( ICONTXT, 'col', '1-tree', 1, NQ, Y, INCY,
403 $ MCROW, MYCOL )
404 END IF
405*
406* Send a row block of WORK to the destination row
407*
408 ELSE
409.EQ. IF( LCMQ1 ) THEN
410.EQ. IF( MYROWMCROW ) THEN
411.NE. IF( MYROWIYROW )
412 $ CALL SGESD2D( ICONTXT, 1, NQ0, WORK, 1, IYROW, MYCOL )
413.EQ. ELSE IF( MYROWIYROW ) THEN
414.EQ. IF( BETAZERO ) THEN
415 CALL SGERV2D( ICONTXT, 1, NQ0, Y, INCY, MCROW, MYCOL )
416 ELSE
417 CALL SGERV2D( ICONTXT, 1, NQ0, WORK, 1, MCROW, MYCOL )
418 CALL PBSVECADD( ICONTXT, 'g', NQ0, ONE, WORK, 1,
419 $ BETA, Y, INCY )
420 END IF
421 END IF
422*
423 ELSE
424 NQ1 = NQ0 * MIN( LCMQ, MAX( 0, ICEIL(NN,NB)-MCCOL ) )
425.EQ. IF( MYROWMCROW ) THEN
426.NE. IF( MYROWIYROW )
427 $ CALL SGESD2D( ICONTXT, 1, NQ1, WORK, 1, IYROW, MYCOL )
428.EQ. ELSE IF( MYROWIYROW ) THEN
429 CALL SGERV2D( ICONTXT, 1, NQ1, WORK, 1, MCROW, MYCOL )
430 END IF
431*
432.EQ. IF( MYROWIYROW ) THEN
433 KZ = 0
434.EQ. IF( MYCOLIYCOL ) KZ = NZ
435 CALL PBSTRST1( ICONTXT, 'row', NQ, NB, KZ, WORK, 1,
436 $ BETA, Y, INCY, LCMP, LCMQ, NQ0 )
437 END IF
438 END IF
439 END IF
440 END IF
441*
442* When x is a row vector
443*
444 ELSE
445*
446* Form y <== x' ( x is a row block )
447*
448* ||
449* ||
450* || _____________
451* (y) <== -----(x)-----
452* ||
453* ||
454* ||
455*
456.LT..OR..GE. IF( IXROW-1 IXROWNPROW ) THEN
457 INFO = 12
458.LT..OR..GE. ELSE IF( IXCOL0 IXCOLNPCOL ) THEN
459 INFO = 13
460.LT..OR..GE. ELSE IF( IYROW0 IYROWNPROW ) THEN
461 INFO = 14
462.LT..OR..GE. ELSE IF( IYCOL-1 IYCOLNPCOL ) THEN
463 INFO = 15
464 END IF
465.NE. IF( INFO0 ) GO TO 10
466*
467* MRROW : row relative position in template from IYROW
468* MRCOL : column relative position in template from IXCOL
469*
470 MRROW = MOD( NPROW+MYROW-IYROW, NPROW )
471 MRCOL = MOD( NPCOL+MYCOL-IXCOL, NPCOL )
472 JYCOL = IYCOL
473.EQ. IF( IYCOL-1 ) JYCOL = IXCOL
474*
475 NP = NUMROC( NN, NB, MYROW, IYROW, NPROW )
476.EQ. IF( MRROW0 ) NP = NP - NZ
477 NQ = NUMROC( NN, NB, MYCOL, IXCOL, NPCOL )
478.EQ. IF( MRCOL0 ) NQ = NQ - NZ
479 NP0 = NUMROC( NUMROC(NN, NB, 0, 0, NPROW), NB, 0, 0, LCMP )
480*
481* When a row process of IXROW has a row block A,
482*
483.GE. IF( IXROW 0 ) THEN
484 TBETA = ZERO
485.EQ. IF( MYCOLJYCOL ) TBETA = BETA
486 KZ = NZ
487*
488 DO 40 I = 0, MIN( LCM, ICEIL(NN,NB) ) - 1
489 MCROW = MOD( MOD(I, NPROW) + IYROW, NPROW )
490 MCCOL = MOD( MOD(I, NPCOL) + IXCOL, NPCOL )
491.EQ. IF( LCMP1 ) NP0 = NUMROC( NN, NB, I, 0, NPROW )
492 JDEX = (I/NPROW) * NB
493.EQ. IF( MRROW0 ) JDEX = MAX(0, JDEX-NZ)
494*
495* A source node copies the blocks to WORK, and send it
496*
497.EQ..AND..EQ. IF( MYROWIXROW MYCOLMCCOL ) THEN
498*
499* The source node is a destination node
500*
501 IDEX = (I/NPCOL) * NB
502.EQ. IF( MRCOL0 ) IDEX = MAX( 0, IDEX-NZ )
503.EQ..AND..EQ. IF( MYROWMCROW MYCOLJYCOL ) THEN
504 CALL PBSTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, KZ,
505 $ X(IDEX*INCX+1), INCX, TBETA,
506 $ Y(JDEX*INCY+1), INCY, LCMQ, LCMP )
507*
508* The source node sends blocks to a destination node
509*
510 ELSE
511 CALL PBSTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, KZ,
512 $ X(IDEX*INCX+1), INCX, ZERO, WORK, 1,
513 $ LCMQ, 1 )
514 CALL SGESD2D( ICONTXT, 1, NP0-KZ, WORK, 1,
515 $ MCROW, JYCOL )
516 END IF
517*
518* A destination node receives the copied blocks
519*
520.EQ..AND..EQ. ELSE IF( MYROWMCROW MYCOLJYCOL ) THEN
521.EQ..AND..EQ. IF( LCMP1 TBETAZERO ) THEN
522 CALL SGERV2D( ICONTXT, 1, NP0-KZ, Y, INCY,
523 $ IXROW, MCCOL )
524 ELSE
525 CALL SGERV2D( ICONTXT, 1, NP0-KZ, WORK, 1,
526 $ IXROW, MCCOL )
527 CALL PBSTR2A1( ICONTXT, NP-JDEX, NB, KZ, WORK, 1, TBETA,
528 $ Y(JDEX*INCY+1), INCY, LCMP*NB )
529 END IF
530 END IF
531 KZ = 0
532 40 CONTINUE
533*
534* Broadcast a column vector Y in each row of template
535*
536.EQ. IF( IYCOL-1 ) THEN
537.EQ. IF( MYCOLJYCOL ) THEN
538 CALL SGEBS2D( ICONTXT, 'row', '1-tree', 1, np, y, incy )
539 ELSE
540 CALL sgebr2d( icontxt, 'Row', '1-tree', 1, np, y, incy,
541 $ myrow, jycol )
542 END IF
543 END IF
544*
545* When all row procesors have a copy of the row block A,
546*
547 ELSE
548 IF( lcmp.EQ.1 ) np0 = np
549*
550* Processors, which have diagonal blocks of A, copy them to
551* WORK array in transposed form
552*
553 kz = 0
554 IF( mrcol.EQ.0 ) kz = nz
555 jz = 0
556 IF( mrcol.EQ.0 .AND. myrow.EQ.iyrow ) jz = nz
557*
558 DO 50 i = 0, lcmq-1
559 IF( mrrow.EQ.mod(npcol*i+mrcol, nprow) ) THEN
560 idex = max( 0, i*nb-kz )
561 IF( lcmp.EQ.1 .AND. (iycol.EQ.-1.OR.iycol.EQ.mycol) ) THEN
562 CALL pbstr2b1( icontxt, trans, nq-idex, nb, jz,
563 $ x(idex*incx+1), incx, beta, y, incy,
564 $ lcmq, 1 )
565 ELSE
566 CALL pbstr2b1( icontxt, trans, nq-idex, nb, jz,
567 $ x(idex*incx+1), incx, zero, work, 1,
568 $ lcmq, 1 )
569 END IF
570 END IF
571 50 CONTINUE
572*
573* Get diagonal blocks of A for each row of the template
574*
575 mccol = mod( mod(mrrow, npcol) + ixcol, npcol )
576 IF( lcmp.GT.1 ) THEN
577 mcrow = mod( nprow+myrow-iyrow, nprow )
578 CALL pbstrget( icontxt, 'Col', 1, np0, iceil( nn, nb ),
579 $ work, 1, mcrow, mccol, igd, myrow, mycol,
580 $ nprow, npcol )
581 END IF
582*
583* Broadcast a column block of WORK in every column of template
584*
585 IF( iycol.EQ.-1 ) THEN
586 IF( mycol.EQ.mccol ) THEN
587 IF( lcmp.GT.1 ) THEN
588 kz = 0
589 IF( myrow.EQ.iyrow ) kz = nz
590 CALL pbstrst1( icontxt, 'Col', np, nb, kz, work, 1,
591 $ beta, y, incy, lcmp, lcmq, np0 )
592 END IF
593 CALL sgebs2d( icontxt, 'Row', '1-tree', 1, np, y, incy )
594 ELSE
595 CALL sgebr2d( icontxt, 'Row', '1-tree', 1, np, y, incy,
596 $ myrow, mccol )
597 END IF
598*
599* Send a column block of WORK to the destination column
600*
601 ELSE
602 IF( lcmp.EQ.1 ) THEN
603 IF( mycol.EQ.mccol ) THEN
604 IF( mycol.NE.iycol )
605 $ CALL sgesd2d( icontxt, 1, np, work, 1, myrow, iycol )
606 ELSE IF( mycol.EQ.iycol ) THEN
607 IF( beta.EQ.zero ) THEN
608 CALL sgerv2d( icontxt, 1, np, y, incy, myrow, mccol )
609 ELSE
610 CALL sgerv2d( icontxt, 1, np, work, 1, myrow, mccol )
611 CALL pbsvecadd( icontxt, 'G', np, one, work, 1, beta,
612 $ y, incy )
613 END IF
614 END IF
615*
616 ELSE
617 np1 = np0 * min( lcmp, max( 0, iceil(nn,nb)-mcrow ) )
618 IF( mycol.EQ.mccol ) THEN
619 IF( mycol.NE.iycol )
620 $ CALL sgesd2d( icontxt, 1, np1, work, 1, myrow, iycol )
621 ELSE IF( mycol.EQ.iycol ) THEN
622 CALL sgerv2d( icontxt, 1, np1, work, 1, myrow, mccol )
623 END IF
624*
625 IF( mycol.EQ.iycol ) THEN
626 kz = 0
627 IF( myrow.EQ.iyrow ) kz = nz
628 CALL pbstrst1( icontxt, 'Col', np, nb, kz, work, 1,
629 $ beta, y, incy, lcmp, lcmq, np0 )
630 END IF
631 END IF
632 END IF
633 END IF
634 END IF
635*
636 RETURN
637*
638* End of PBSTRNV
639*
640 END
641*
642*=======================================================================
643* SUBROUTINE PBSTR2A1
644*=======================================================================
645*
646 SUBROUTINE pbstr2a1( ICONTXT, N, NB, NZ, X, INCX, BETA, Y, INCY,
647 $ INTV )
648*
649* -- PB-BLAS routine (version 2.1) --
650* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
651* April 28, 1996
652*
653* .. Scalar Arguments ..
654 INTEGER ICONTXT, N, NB, NZ, INCX, INCY, INTV
655 REAL BETA
656* ..
657* .. Array Arguments ..
658 REAL X( * ), Y( * )
659* ..
660*
661* Purpose
662* =======
663*
664* y <== x
665* y is a scattered vector, copied from a condensed vector x.
666*
667* ..
668* .. Intrinsic Functions ..
669 INTRINSIC min
670* ..
671* .. External Functions ..
672 INTEGER ICEIL
673 EXTERNAL ICEIL
674* ..
675* .. External Subroutines ..
676 EXTERNAL pbsvecadd
677* ..
678* .. Parameters ..
679 REAL ONE
680 PARAMETER ( ONE = 1.0e+0 )
681* ..
682* .. Local Variables ..
683 INTEGER IX, IY, JZ, K, ITER
684*
685 IX = 0
686 iy = 0
687 jz = nz
688 iter = iceil( n+nz, intv )
689*
690 IF( iter.GT.1 ) THEN
691 CALL pbsvecadd( icontxt, 'G', nb-jz, one, x(ix*incx+1), incx,
692 $ beta, y(iy*incy+1), incy )
693 ix = ix + nb - jz
694 iy = iy + intv - jz
695 jz = 0
696*
697 DO 10 k = 2, iter-1
698 CALL pbsvecadd( icontxt, 'G', nb, one, x(ix*incx+1), incx,
699 $ beta, y(iy*incy+1), incy )
700 ix = ix + nb
701 iy = iy + intv
702 10 CONTINUE
703 END IF
704*
705 CALL pbsvecadd( icontxt, 'G', min( n-iy, nb-jz ), one,
706 $ x(ix*incx+1), incx, beta, y(iy*incy+1), incy )
707*
708 RETURN
709*
710* End of PBSTR2A1
711*
712 END
713*
714*=======================================================================
715* SUBROUTINE PBSTR2B1
716*=======================================================================
717*
718 SUBROUTINE pbstr2b1( ICONTXT, TRANS, N, NB, NZ, X, INCX, BETA, Y,
719 $ INCY, JINX, JINY )
720*
721* -- PB-BLAS routine (version 2.1) --
722* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
723* April 28, 1996
724*
725* .. Scalar Arguments ..
726 CHARACTER*1 TRANS
727 INTEGER ICONTXT, N, NB, NZ, INCX, INCY, JINX, JINY
728 REAL BETA
729* ..
730* .. Array Arguments ..
731 REAL X( * ), Y( * )
732* ..
733*
734* Purpose
735* =======
736*
737* y <== x + beta * y
738* y is a condensed vector, copied from a scattered vector x
739*
740* ..
741* .. Intrinsic Functions ..
742 INTRINSIC min
743* ..
744* .. External Functions ..
745 INTEGER ICEIL
746 EXTERNAL iceil
747* ..
748* .. External Subroutines ..
749 EXTERNAL pbsvecadd
750* ..
751* .. Parameters ..
752 REAL ONE
753 parameter( one = 1.0e+0 )
754* ..
755* .. Local Variables ..
756 INTEGER IX, IY, JZ, K, ITER, LENX, LENY
757*
758 IF( jinx.EQ.1 .AND. jiny.EQ.1 ) THEN
759 CALL pbsvecadd( icontxt, trans, n, one, x, incx, beta,
760 $ y, incy )
761*
762 ELSE
763 ix = 0
764 iy = 0
765 jz = nz
766 lenx = nb * jinx
767 leny = nb * jiny
768 iter = iceil( n+nz, lenx )
769*
770 IF( iter.GT.1 ) THEN
771 CALL pbsvecadd( icontxt, trans, nb-jz, one, x(ix*incx+1),
772 $ incx, beta, y(iy*incy+1), incy )
773 ix = ix + lenx - jz
774 iy = iy + leny - jz
775 jz = 0
776*
777 DO 10 k = 2, iter-1
778 CALL pbsvecadd( icontxt, trans, nb, one, x(ix*incx+1),
779 $ incx, beta, y(iy*incy+1), incy )
780 ix = ix + lenx
781 iy = iy + leny
782 10 CONTINUE
783 END IF
784*
785 CALL pbsvecadd( icontxt, trans, min( n-ix, nb-jz ), one,
786 $ x(ix*incx+1), incx, beta, y(iy*incy+1), incy )
787 END IF
788*
789 RETURN
790*
791* End of PBSTR2B1
792*
793 END
logical function lsame(ca, cb)
LSAME
Definition lsame.f:53
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
subroutine sgebs2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1072
subroutine pxerbla(contxt, srname, info)
Definition mpi.f:1600
subroutine sgebr2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1113
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
subroutine pbstrget(icontxt, adist, m, n, mnb, a, lda, mcrow, mccol, igd, myrow, mycol, nprow, npcol)
Definition pbstrget.f:3
subroutine pbstr2a1(icontxt, n, nb, nz, x, incx, beta, y, incy, intv)
Definition pbstrnv.f:648
subroutine pbstr2b1(icontxt, trans, n, nb, nz, x, incx, beta, y, incy, jinx, jiny)
Definition pbstrnv.f:720
subroutine pbstrnv(icontxt, xdist, trans, n, nb, nz, x, incx, beta, y, incy, ixrow, ixcol, iyrow, iycol, work)
Definition pbstrnv.f:4
subroutine pbstrst1(icontxt, xdist, n, nb, nz, x, incx, beta, y, incy, lcmp, lcmq, nint)
Definition pbstrst1.f:3
subroutine pbsvecadd(icontxt, mode, n, alpha, x, incx, beta, y, incy)
Definition pbsvecadd.f:3