OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pbdtran.f File Reference

Go to the source code of this file.

Functions/Subroutines

subroutine pbdtran (icontxt, adist, trans, m, n, nb, a, lda, beta, c, ldc, iarow, iacol, icrow, iccol, work)
subroutine pbdtr2at (icontxt, adist, trans, m, n, nb, a, lda, beta, b, ldb, lcmp, lcmq)
subroutine pbdtr2bt (icontxt, adist, trans, m, n, nb, a, lda, beta, b, ldb, intv)
subroutine pbdtr2af (icontxt, adist, m, n, nb, a, lda, beta, b, ldb, lcmp, lcmq, nint)

Function/Subroutine Documentation

◆ pbdtr2af()

subroutine pbdtr2af ( integer icontxt,
character*1 adist,
integer m,
integer n,
integer nb,
double precision, dimension( lda, * ) a,
integer lda,
double precision beta,
double precision, dimension( ldb, * ) b,
integer ldb,
integer lcmp,
integer lcmq,
integer nint )

Definition at line 791 of file pbdtran.f.

793*
794* -- PB-BLAS routine (version 2.1) --
795* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
796* April 28, 1996
797*
798* .. Scalar Arguments ..
799 CHARACTER*1 ADIST
800 INTEGER ICONTXT, M, N, NB, LDA, LDB, LCMP, LCMQ, NINT
801 DOUBLE PRECISION BETA
802* ..
803* .. Array Arguments ..
804 DOUBLE PRECISION A( LDA, * ), B( LDB, * )
805* ..
806*
807* Purpose
808* =======
809*
810* PBDTR2AF forms T <== A + BETA*T, where T is a scattered block
811* row (or column) copied from a (condensed) block column (or row) of A
812*
813* =====================================================================
814*
815* .. Parameters ..
816 DOUBLE PRECISION ONE
817 parameter( one = 1.0d+0 )
818* ..
819* .. Local Scalars ..
820 INTEGER JA, JB, K, INTV
821* ..
822* .. External Functions ..
823 LOGICAL LSAME
824 INTEGER ICEIL
825 EXTERNAL lsame, iceil
826* ..
827* .. Intrinsic Functions ..
828 INTRINSIC min
829* ..
830* .. Executable Statements ..
831*
832 IF( lsame( adist, 'R' ) ) THEN
833 intv = nb * lcmq
834 ja = 1
835 jb = 1
836 DO 10 k = 1, iceil( nint, nb )
837 CALL pbdmatadd( icontxt, 'G', m, min( n-jb+1, nb ), one,
838 $ a(1,ja), lda, beta, b(1,jb), ldb )
839 ja = ja + nb
840 jb = jb + intv
841 10 CONTINUE
842*
843* if( LSAME( ADIST, 'C' ) ) then
844*
845 ELSE
846 intv = nb * lcmp
847 ja = 1
848 jb = 1
849 DO 20 k = 1, iceil( nint, nb )
850 CALL pbdmatadd( icontxt, 'G', min( m-jb+1, nb ), n, one,
851 $ a(ja,1), lda, beta, b(jb,1), ldb )
852 ja = ja + nb
853 jb = jb + intv
854 20 CONTINUE
855 END IF
856*
857 RETURN
858*
859* End of PBDTR2AF
860*
logical function lsame(ca, cb)
LSAME
Definition lsame.f:53
integer function iceil(inum, idenom)
Definition iceil.f:2
#define min(a, b)
Definition macros.h:20
subroutine pbdmatadd(icontxt, mode, m, n, alpha, a, lda, beta, b, ldb)
Definition pbdmatadd.f:3

◆ pbdtr2at()

subroutine pbdtr2at ( integer icontxt,
character*1 adist,
character*1 trans,
integer m,
integer n,
integer nb,
double precision, dimension( lda, * ) a,
integer lda,
double precision beta,
double precision, dimension( ldb, * ) b,
integer ldb,
integer lcmp,
integer lcmq )

Definition at line 613 of file pbdtran.f.

615*
616* -- PB-BLAS routine (version 2.1) --
617* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
618* April 28, 1996
619*
620* .. Scalar Arguments ..
621 CHARACTER*1 ADIST, TRANS
622 INTEGER ICONTXT, LCMP, LCMQ, LDA, LDB, M, N, NB
623 DOUBLE PRECISION BETA
624* ..
625* .. Array Arguments ..
626 DOUBLE PRECISION A( LDA, * ), B( LDB, * )
627* ..
628*
629* Purpose
630* =======
631*
632* PBDTR2AT forms B <== A^T + beta*B, or A^C + beta*B
633* B is a ((conjugate) transposed) scattered block row (or column),
634* copied from a scattered block column (or row) of A
635*
636* =====================================================================
637*
638* .. Parameters ..
639 DOUBLE PRECISION ONE
640 parameter( one = 1.0d+0 )
641* ..
642* .. Local Scalars ..
643 INTEGER IA, IB, K, INTV, JNTV
644* ..
645* .. External Subroutines ..
646 EXTERNAL pbdmatadd
647* ..
648* .. External Functions ..
649 LOGICAL LSAME
650 INTEGER ICEIL
651 EXTERNAL lsame, iceil
652* ..
653* .. Intrinsic Functions ..
654 INTRINSIC min
655* ..
656* .. Excutable Statements ..
657*
658 IF( lcmp.EQ.lcmq ) THEN
659 CALL pbdmatadd( icontxt, trans, n, m, one, a, lda, beta, b,
660 $ ldb )
661*
662 ELSE
663*
664* If A is a column block ( ADIST = 'C' ),
665*
666 IF( lsame( adist, 'C' ) ) THEN
667 intv = lcmp * nb
668 jntv = lcmq * nb
669 ia = 1
670 ib = 1
671 DO 10 k = 1, iceil( m, intv )
672 CALL pbdmatadd( icontxt, trans, n, min( m-ia+1, nb ),
673 $ one, a(ia,1), lda, beta, b(1,ib), ldb )
674 ia = ia + intv
675 ib = ib + jntv
676 10 CONTINUE
677*
678* If A is a row block ( ADIST = 'R' ),
679*
680 ELSE
681 intv = lcmp * nb
682 jntv = lcmq * nb
683 ia = 1
684 ib = 1
685 DO 20 k = 1, iceil( n, jntv )
686 CALL pbdmatadd( icontxt, trans, min( n-ia+1, nb ), m,
687 $ one, a(1,ia), lda, beta, b(ib,1), ldb )
688 ia = ia + jntv
689 ib = ib + intv
690 20 CONTINUE
691 END IF
692 END IF
693*
694 RETURN
695*
696* End of PBDTR2AT
697*

◆ pbdtr2bt()

subroutine pbdtr2bt ( integer icontxt,
character*1 adist,
character*1 trans,
integer m,
integer n,
integer nb,
double precision, dimension( lda, * ) a,
integer lda,
double precision beta,
double precision, dimension( ldb, * ) b,
integer ldb,
integer intv )

Definition at line 704 of file pbdtran.f.

706*
707* -- PB-BLAS routine (version 2.1) --
708* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
709* April 28, 1996
710*
711* .. Scalar Arguments ..
712 CHARACTER*1 ADIST, TRANS
713 INTEGER ICONTXT, INTV, LDA, LDB, M, N, NB
714 DOUBLE PRECISION BETA
715* ..
716* .. Array Arguments ..
717 DOUBLE PRECISION A( LDA, * ), B( LDB, * )
718* ..
719*
720* Purpose
721* =======
722*
723* PBDTR2BT forms T <== A^T + beta*T or A^C + beta*T, where T is a
724* ((conjugate) transposed) condensed block row (or column), copied from
725* a scattered block column (or row) of A
726*
727* =====================================================================
728*
729* .. Parameters ..
730 DOUBLE PRECISION ONE
731 parameter( one = 1.0d+0 )
732* ..
733* .. Local Scalars ..
734 INTEGER IA, IB, K
735* ..
736* .. External Functions ..
737 LOGICAL LSAME
738 INTEGER ICEIL
739 EXTERNAL lsame, iceil
740* ..
741* .. External Subroutines ..
742 EXTERNAL pbdmatadd
743* ..
744* .. Intrinsic Functions ..
745 INTRINSIC min
746* ..
747* .. Excutable Statements ..
748*
749 IF( intv.EQ.nb ) THEN
750 CALL pbdmatadd( icontxt, trans, n, m, one, a, lda, beta, b,
751 $ ldb )
752*
753 ELSE
754*
755* If A is a column block ( ADIST = 'C' ),
756*
757 IF( lsame( adist, 'C' ) ) THEN
758 ia = 1
759 ib = 1
760 DO 10 k = 1, iceil( m, intv )
761 CALL pbdmatadd( icontxt, trans, n, min( m-ia+1, nb ),
762 $ one, a(ia,1), lda, beta, b(1,ib), ldb )
763 ia = ia + intv
764 ib = ib + nb
765 10 CONTINUE
766*
767* If A is a row block (ADIST = 'R'),
768*
769 ELSE
770 ia = 1
771 ib = 1
772 DO 20 k = 1, iceil( n, intv )
773 CALL pbdmatadd( icontxt, trans, min( n-ia+1, nb ), m,
774 $ one, a(1,ia), lda, beta, b(ib,1), ldb )
775 ia = ia + intv
776 ib = ib + nb
777 20 CONTINUE
778 END IF
779 END IF
780*
781 RETURN
782*
783* End of PBDTR2BT
784*

◆ pbdtran()

subroutine pbdtran ( integer icontxt,
character*1 adist,
character*1 trans,
integer m,
integer n,
integer nb,
double precision, dimension( lda, * ) a,
integer lda,
double precision beta,
double precision, dimension( ldc, * ) c,
integer ldc,
integer iarow,
integer iacol,
integer icrow,
integer iccol,
double precision, dimension( * ) work )

Definition at line 1 of file pbdtran.f.

3*
4* -- PB-BLAS routine (version 2.1) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
6* April 28, 1996
7*
8* Jaeyoung Choi, Oak Ridge National Laboratory
9* Jack Dongarra, University of Tennessee and Oak Ridge National Lab.
10* David Walker, Oak Ridge National Laboratory
11*
12* .. Scalar Arguments ..
13 CHARACTER*1 ADIST, TRANS
14 INTEGER IACOL, IAROW, ICCOL, ICONTXT, ICROW, LDA, LDC,
15 $ M, N, NB
16 DOUBLE PRECISION BETA
17* ..
18* .. Array Arguments ..
19 DOUBLE PRECISION A( LDA, * ), C( LDC, * ), WORK( * )
20* ..
21*
22* Purpose
23* =======
24*
25* PBDTRAN transposes a column block to row block, or a row block to
26* column block by reallocating data distribution.
27*
28* C := A^T + beta*C, or C := A^C + beta*C
29*
30* where A is an M-by-N matrix and C is an N-by-M matrix, and the size
31* of M or N is limited to its block size NB.
32*
33* The first elements of the matrices A, and C should be located at
34* the beginnings of their first blocks. (not the middle of the blocks.)
35*
36* Parameters
37* ==========
38*
39* ICONTXT (input) INTEGER
40* ICONTXT is the BLACS mechanism for partitioning communication
41* space. A defining property of a context is that a message in
42* a context cannot be sent or received in another context. The
43* BLACS context includes the definition of a grid, and each
44* process' coordinates in it.
45*
46* ADIST - (input) CHARACTER*1
47* ADIST specifies whether A is a column block or a row block.
48*
49* ADIST = 'C', A is a column block
50* ADIST = 'R', A is a row block
51*
52* TRANS - (input) CHARACTER*1
53* TRANS specifies whether the transposed format is transpose
54* or conjugate transpose. If the matrices A and C are real,
55* the argument is ignored.
56*
57* TRANS = 'T', transpose
58* TRANS = 'C', conjugate transpose
59*
60* M - (input) INTEGER
61* M specifies the (global) number of rows of the matrix (block
62* column or block row) A and of columns of the matrix C.
63* M >= 0.
64*
65* N - (input) INTEGER
66* N specifies the (global) number of columns of the matrix
67* (block column or block row) A and of columns of the matrix
68* C. N >= 0.
69*
70* NB - (input) INTEGER
71* NB specifies the column block size of the matrix A and the
72* row block size of the matrix C when ADIST = 'C'. Otherwise,
73* it specifies the row block size of the matrix A and the
74* column block size of the matrix C. NB >= 1.
75*
76* A (input) DOUBLE PRECISION array of DIMENSION ( LDA, Lx ),
77* where Lx is N when ADIST = 'C', or Nq when ADIST = 'R'.
78* Before entry with ADIST = 'C', the leading Mp by N part of
79* the array A must contain the matrix A, otherwise the leading
80* M by Nq part of the array A must contain the matrix A. See
81* parameter details for the values of Mp and Nq.
82*
83* LDA (input) INTEGER
84* LDA specifies the leading dimension of (local) A as declared
85* in the calling (sub) program. LDA >= MAX(1,Mp) when
86* ADIST = 'C', or LDA >= MAX(1,M) otherwise.
87*
88* BETA (input) DOUBLE PRECISION
89* BETA specifies scaler beta.
90*
91* C (input/output) DOUBLE PRECISION array of DIMENSION
92* ( LDC, Lx ),
93* where Lx is Mq when ADIST = 'C', or N when ADIST = 'R'.
94* If ADIST = 'C', the leading N-by-Mq part of the array C
95* contains the (local) matrix C, otherwise the leading
96* Np-by-M part of the array C must contain the (local) matrix
97* C. C will not be referenced if beta is zero.
98*
99* LDC (input) INTEGER
100* LDC specifies the leading dimension of (local) C as declared
101* in the calling (sub) program. LDC >= MAX(1,N) when ADIST='C',
102* or LDC >= MAX(1,Np) otherwise.
103*
104* IAROW (input) INTEGER
105* IAROW specifies a row of the process template,
106* which holds the first block of the matrix A. If A is a row
107* of blocks (ADIST = 'R') and all rows of processes have a copy
108* of A, then set IAROW = -1.
109*
110* IACOL (input) INTEGER
111* IACOL specifies a column of the process template,
112* which holds the first block of the matrix A. If A is a
113* column of blocks (ADIST = 'C') and all columns of processes
114* have a copy of A, then set IACOL = -1.
115*
116* ICROW (input) INTEGER
117* ICROW specifies the current row process which holds
118* the first block of the matrix C, which is transposed of A.
119* If C is a row of blocks (ADIST = 'C') and the transposed
120* row block C is distributed all rows of processes, set
121* ICROW = -1.
122*
123* ICCOL (input) INTEGER
124* ICCOL specifies the current column process which holds
125* the first block of the matrix C, which is transposed of A.
126* If C is a column of blocks (ADIST = 'R') and the transposed
127* column block C is distributed all columns of processes,
128* set ICCOL = -1.
129*
130* WORK (workspace) DOUBLE PRECISION array of dimension Size(WORK).
131* It needs extra working space of A'.
132*
133* Parameters Details
134* ==================
135*
136* Lx It is a local portion of L owned by a process, (L is
137* replaced by M, or N, and x is replaced by either p (=NPROW)
138* or q (=NPCOL)). The value is determined by L, LB, x, and
139* MI, where LB is a block size and MI is a row or column
140* position in a process template. Lx is equal to or less
141* than Lx0 = CEIL( L, LB*x ) * LB.
142*
143* Communication Scheme
144* ====================
145*
146* The communication scheme of the routine is set to '1-tree', which is
147* fan-out. (For details, see BLACS user's guide.)
148*
149* Memory Requirement of WORK
150* ==========================
151*
152* Mqb = CEIL( M, NB*NPCOL )
153* Npb = CEIL( N, NB*NPROW )
154* LCMQ = LCM / NPCOL
155* LCMP = LCM / NPROW
156*
157* (1) ADIST = 'C'
158* (a) IACOL != -1
159* Size(WORK) = N * CEIL(Mqb,LCMQ)*NB
160* (b) IACOL = -1
161* Size(WORK) = N * CEIL(Mqb,LCMQ)*NB * MIN(LCMQ,CEIL(M,NB))
162*
163* (2) ADIST = 'R'
164* (a) IAROW != -1
165* Size(WORK) = M * CEIL(Npb,LCMP)*NB
166* (b) IAROW = -1
167* Size(WORK) = M * CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(N,NB))
168*
169* Notes
170* -----
171* More precise space can be computed as
172*
173* CEIL(Mqb,LCMQ)*NB => NUMROC( NUMROC(M,NB,0,0,NPCOL), NB, 0, 0, LCMQ )
174* CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(N,NB,0,0,NPROW), NB, 0, 0, LCMP )
175*
176* =====================================================================
177*
178* ..
179* .. Parameters ..
180 DOUBLE PRECISION ONE, ZERO
181 parameter( one = 1.0d+0, zero = 0.0d+0 )
182* ..
183* .. Local Scalars ..
184 LOGICAL COLFORM, ROWFORM
185 INTEGER I, IDEX, IGD, INFO, JCCOL, JCROW, JDEX, LCM,
186 $ LCMP, LCMQ, MCCOL, MCROW, ML, MP, MQ, MQ0,
187 $ MRCOL, MRROW, MYCOL, MYROW, NP, NP0, NPCOL,
188 $ NPROW, NQ
189 DOUBLE PRECISION TBETA
190* ..
191* .. External Functions ..
192 LOGICAL LSAME
193 INTEGER ILCM, ICEIL, NUMROC
194 EXTERNAL ilcm, iceil, lsame, numroc
195* ..
196* .. External Subroutines ..
197 EXTERNAL blacs_gridinfo, dgebr2d, dgebs2d, dgerv2d,
198 $ dgesd2d, pbdmatadd, pbdtr2af, pbdtr2at,
200* ..
201* .. Intrinsic Functions ..
202 INTRINSIC max, min, mod
203* ..
204* .. Executable Statements ..
205*
206* Quick return if possible.
207*
208 IF( m.EQ.0 .OR. n.EQ.0 ) RETURN
209*
210 CALL blacs_gridinfo( icontxt, nprow, npcol, myrow, mycol )
211*
212 colform = lsame( adist, 'C' )
213 rowform = lsame( adist, 'R' )
214*
215* Test the input parameters.
216*
217 info = 0
218 IF( ( .NOT.colform ) .AND. ( .NOT.rowform ) ) THEN
219 info = 2
220 ELSE IF( m .LT.0 ) THEN
221 info = 4
222 ELSE IF( n .LT.0 ) THEN
223 info = 5
224 ELSE IF( nb.LT.1 ) THEN
225 info = 6
226 ELSE IF( iarow.LT.-1 .OR. iarow.GE.nprow .OR.
227 $ ( iarow.EQ.-1 .AND. colform ) ) THEN
228 info = 12
229 ELSE IF( iacol.LT.-1 .OR. iacol.GE.npcol .OR.
230 $ ( iacol.EQ.-1 .AND. rowform ) ) THEN
231 info = 13
232 ELSE IF( icrow.LT.-1 .OR. icrow.GE.nprow .OR.
233 $ ( icrow.EQ.-1 .AND. rowform ) ) THEN
234 info = 14
235 ELSE IF( iccol.LT.-1 .OR. iccol.GE.npcol .OR.
236 $ ( iccol.EQ.-1 .AND. colform ) ) THEN
237 info = 15
238 END IF
239*
240 10 CONTINUE
241 IF( info .NE. 0 ) THEN
242 CALL pxerbla( icontxt, 'PBDTRAN ', info )
243 RETURN
244 END IF
245*
246* Start the operations.
247*
248* LCM : the least common multiple of NPROW and NPCOL
249*
250 lcm = ilcm( nprow, npcol )
251 lcmp = lcm / nprow
252 lcmq = lcm / npcol
253 igd = npcol / lcmp
254*
255* When A is a column block
256*
257 IF( colform ) THEN
258*
259* Form C <== A' ( A is a column block )
260* _
261* | |
262* | |
263* _____________ | |
264* |______C______| <== |A|
265* | |
266* | |
267* |_|
268*
269* MRROW : row relative position in template from IAROW
270* MRCOL : column relative position in template from ICCOL
271*
272 mrrow = mod( nprow+myrow-iarow, nprow )
273 mrcol = mod( npcol+mycol-iccol, npcol )
274 jcrow = icrow
275 IF( icrow.EQ.-1 ) jcrow = iarow
276*
277 mp = numroc( m, nb, myrow, iarow, nprow )
278 mq = numroc( m, nb, mycol, iccol, npcol )
279 mq0 = numroc( numroc(m, nb, 0, 0, npcol), nb, 0, 0, lcmq )
280*
281 IF( lda.LT.mp .AND.
282 $ ( iacol.EQ.mycol .OR. iacol.EQ.-1 ) ) THEN
283 info = 8
284 ELSE IF( ldc.LT.n .AND.
285 $ ( icrow.EQ.myrow .OR. icrow.EQ.-1 ) ) THEN
286 info = 11
287 END IF
288 IF( info.NE.0 ) GO TO 10
289*
290* When a column process of IACOL has a column block A,
291*
292 IF( iacol.GE.0 ) THEN
293 tbeta = zero
294 IF( myrow.EQ.jcrow ) tbeta = beta
295*
296 DO 20 i = 0, min( lcm, iceil(m,nb) ) - 1
297 mcrow = mod( mod(i, nprow) + iarow, nprow )
298 mccol = mod( mod(i, npcol) + iccol, npcol )
299 IF( lcmq.EQ.1 ) mq0 = numroc( m, nb, i, 0, npcol )
300 jdex = (i/npcol) * nb
301*
302* A source node copies the blocks to WORK, and send it
303*
304 IF( myrow.EQ.mcrow .AND. mycol.EQ.iacol ) THEN
305*
306* The source node is a destination node
307*
308 idex = (i/nprow) * nb
309 IF( myrow.EQ.jcrow .AND. mycol.EQ.mccol ) THEN
310 CALL pbdtr2at( icontxt, 'Col', trans, mp-idex, n, nb,
311 $ a(idex+1,1), lda, tbeta, c(1,jdex+1),
312 $ ldc, lcmp, lcmq )
313*
314* The source node sends blocks to a destination node
315*
316 ELSE
317 CALL pbdtr2bt( icontxt, 'Col', trans, mp-idex, n, nb,
318 $ a(idex+1,1), lda, zero, work, n,
319 $ lcmp*nb )
320 CALL dgesd2d( icontxt, n, mq0, work, n, jcrow, mccol )
321 END IF
322*
323* A destination node receives the copied blocks
324*
325 ELSE IF( myrow.EQ.jcrow .AND. mycol.EQ.mccol ) THEN
326 IF( lcmq.EQ.1 .AND. tbeta.EQ.zero ) THEN
327 CALL dgerv2d( icontxt, n, mq0, c, ldc, mcrow, iacol )
328 ELSE
329 CALL dgerv2d( icontxt, n, mq0, work, n, mcrow, iacol )
330 CALL pbdtr2af( icontxt, 'Row', n, mq-jdex, nb, work, n,
331 $ tbeta, c(1,jdex+1), ldc, lcmp, lcmq,
332 $ mq0 )
333 END IF
334 END IF
335 20 CONTINUE
336*
337* Broadcast a row block of C in each column of template
338*
339 IF( icrow.EQ.-1 ) THEN
340 IF( myrow.EQ.jcrow ) THEN
341 CALL dgebs2d( icontxt, 'Col', '1-tree', n, mq, c, ldc )
342 ELSE
343 CALL dgebr2d( icontxt, 'Col', '1-tree', n, mq, c, ldc,
344 $ jcrow, mycol )
345 END IF
346 END IF
347*
348* When all column procesors have a copy of the column block A,
349*
350 ELSE
351 IF( lcmq.EQ.1 ) mq0 = mq
352*
353* Processors, which have diagonal blocks of A, copy them to
354* WORK array in transposed form
355*
356 DO 30 i = 0, lcmp-1
357 IF( mrcol.EQ.mod( nprow*i+mrrow, npcol ) ) THEN
358 IF( lcmq.EQ.1.AND.(icrow.EQ.-1.OR.icrow.EQ.myrow) ) THEN
359 CALL pbdtr2bt( icontxt, 'Col', trans, mp-i*nb, n, nb,
360 $ a(i*nb+1,1), lda, beta, c, ldc,
361 $ lcmp*nb )
362 ELSE
363 CALL pbdtr2bt( icontxt, 'Col', trans, mp-i*nb, n, nb,
364 $ a(i*nb+1,1), lda, zero, work, n,
365 $ lcmp*nb )
366 END IF
367 END IF
368 30 CONTINUE
369*
370* Get diagonal blocks of A for each column of the template
371*
372 mcrow = mod( mod(mrcol,nprow)+iarow, nprow )
373 IF( lcmq.GT.1 ) THEN
374 mccol = mod( npcol+mycol-iccol, npcol )
375 CALL pbdtrget( icontxt, 'Row', n, mq0, iceil(m,nb), work, n,
376 $ mcrow, mccol, igd, myrow, mycol, nprow,
377 $ npcol )
378 END IF
379*
380* Broadcast a row block of WORK in every row of template
381*
382 IF( icrow.EQ.-1 ) THEN
383 IF( myrow.EQ.mcrow ) THEN
384 IF( lcmq.GT.1 )
385 $ CALL pbdtrsrt( icontxt, 'Row', n, mq, nb, work, n, beta,
386 $ c, ldc, lcmp, lcmq, mq0 )
387 CALL dgebs2d( icontxt, 'Col', '1-tree', n, mq, c, ldc )
388 ELSE
389 CALL dgebr2d( icontxt, 'Col', '1-tree', n, mq, c, ldc,
390 $ mcrow, mycol )
391 END IF
392*
393* Send a row block of WORK to the destination row
394*
395 ELSE
396 IF( lcmq.EQ.1 ) THEN
397 IF( myrow.EQ.mcrow ) THEN
398 IF( myrow.NE.icrow )
399 $ CALL dgesd2d( icontxt, n, mq, work, n, icrow, mycol )
400 ELSE IF( myrow.EQ.icrow ) THEN
401 IF( beta.EQ.zero ) THEN
402 CALL dgerv2d( icontxt, n, mq, c, ldc, mcrow, mycol )
403 ELSE
404 CALL dgerv2d( icontxt, n, mq, work, n, mcrow, mycol )
405 CALL pbdmatadd( icontxt, 'G', n, mq, one, work, n,
406 $ beta, c, ldc )
407 END IF
408 END IF
409*
410 ELSE
411 ml = mq0 * min( lcmq, max(0,iceil(m,nb)-mccol) )
412 IF( myrow.EQ.mcrow ) THEN
413 IF( myrow.NE.icrow )
414 $ CALL dgesd2d( icontxt, n, ml, work, n, icrow, mycol )
415 ELSE IF( myrow.EQ.icrow ) THEN
416 CALL dgerv2d( icontxt, n, ml, work, n, mcrow, mycol )
417 END IF
418*
419 IF( myrow.EQ.icrow )
420 $ CALL pbdtrsrt( icontxt, 'Row', n, mq, nb, work, n, beta,
421 $ c, ldc, lcmp, lcmq, mq0 )
422 END IF
423 END IF
424*
425 END IF
426*
427* When A is a row block
428*
429 ELSE
430*
431* Form C <== A' ( A is a row block )
432* _
433* | |
434* | |
435* | | _____________
436* |C| <== |______A______|
437* | |
438* | |
439* |_|
440*
441* MRROW : row relative position in template from ICROW
442* MRCOL : column relative position in template from IACOL
443*
444 mrrow = mod( nprow+myrow-icrow, nprow )
445 mrcol = mod( npcol+mycol-iacol, npcol )
446 jccol = iccol
447 IF( iccol.EQ.-1 ) jccol = iacol
448*
449 np = numroc( n, nb, myrow, icrow, nprow )
450 nq = numroc( n, nb, mycol, iacol, npcol )
451 np0 = numroc( numroc(n, nb, 0, 0, nprow), nb, 0, 0, lcmp )
452*
453 IF( lda.LT.m .AND.
454 $ ( iarow.EQ.myrow .OR. iarow.EQ.-1 ) ) THEN
455 info = 8
456 ELSE IF( ldc.LT.np .AND.
457 $ ( iccol.EQ.mycol .OR. iccol.EQ.-1 ) ) THEN
458 info = 11
459 END IF
460 IF( info.NE.0 ) GO TO 10
461*
462* When a row process of IAROW has a row block A,
463*
464 IF( iarow.GE.0 ) THEN
465 tbeta = zero
466 IF( mycol.EQ.jccol ) tbeta = beta
467*
468 DO 40 i = 0, min( lcm, iceil(n,nb) ) - 1
469 mcrow = mod( mod(i, nprow) + icrow, nprow )
470 mccol = mod( mod(i, npcol) + iacol, npcol )
471 IF( lcmp.EQ.1 ) np0 = numroc( n, nb, i, 0, nprow )
472 idex = (i/nprow) * nb
473*
474* A source node copies the blocks to WORK, and send it
475*
476 IF( myrow.EQ.iarow .AND. mycol.EQ.mccol ) THEN
477*
478* The source node is a destination node
479*
480 jdex = (i/npcol) * nb
481 IF( myrow.EQ.mcrow .AND. mycol.EQ.jccol ) THEN
482 CALL pbdtr2at( icontxt, 'Row', trans, m, nq-jdex, nb,
483 $ a(1,jdex+1), lda, tbeta, c(idex+1,1),
484 $ ldc, lcmp, lcmq )
485*
486* The source node sends blocks to a destination node
487*
488 ELSE
489 CALL pbdtr2bt( icontxt, 'Row', trans, m, nq-jdex, nb,
490 $ a(1,jdex+1), lda, zero, work, np0,
491 $ lcmq*nb )
492 CALL dgesd2d( icontxt, np0, m, work, np0,
493 $ mcrow, jccol )
494 END IF
495*
496* A destination node receives the copied blocks
497*
498 ELSE IF( myrow.EQ.mcrow .AND. mycol.EQ.jccol ) THEN
499 IF( lcmp.EQ.1 .AND. tbeta.EQ.zero ) THEN
500 CALL dgerv2d( icontxt, np0, m, c, ldc, iarow, mccol )
501 ELSE
502 CALL dgerv2d( icontxt, np0, m, work, np0, iarow, mccol )
503 CALL pbdtr2af( icontxt, 'Col', np-idex, m, nb, work,
504 $ np0, tbeta, c(idex+1,1), ldc, lcmp, lcmq,
505 $ np0 )
506 END IF
507 END IF
508 40 CONTINUE
509*
510* Broadcast a column block of WORK in each row of template
511*
512 IF( iccol.EQ.-1 ) THEN
513 IF( mycol.EQ.jccol ) THEN
514 CALL dgebs2d( icontxt, 'Row', '1-tree', np, m, c, ldc )
515 ELSE
516 CALL dgebr2d( icontxt, 'Row', '1-tree', np, m, c, ldc,
517 $ myrow, jccol )
518 END IF
519 END IF
520*
521* When all row procesors have a copy of the row block A,
522*
523 ELSE
524 IF( lcmp.EQ.1 ) np0 = np
525*
526* Processors, which have diagonal blocks of A, copy them to
527* WORK array in transposed form
528*
529 DO 50 i = 0, lcmq-1
530 IF( mrrow.EQ.mod(npcol*i+mrcol, nprow) ) THEN
531 IF( lcmp.EQ.1.AND.(iccol.EQ.-1.OR.iccol.EQ.mycol) ) THEN
532 CALL pbdtr2bt( icontxt, 'Row', trans, m, nq-i*nb, nb,
533 $ a(1,i*nb+1), lda, beta, c, ldc,
534 $ lcmq*nb )
535 ELSE
536 CALL pbdtr2bt( icontxt, 'Row', trans, m, nq-i*nb, nb,
537 $ a(1,i*nb+1), lda, zero, work, np0,
538 $ lcmq*nb )
539 END IF
540 END IF
541 50 CONTINUE
542*
543* Get diagonal blocks of A for each row of the template
544*
545 mccol = mod( mod(mrrow, npcol)+iacol, npcol )
546 IF( lcmp.GT.1 ) THEN
547 mcrow = mod( nprow+myrow-icrow, nprow )
548 CALL pbdtrget( icontxt, 'Col', np0, m, iceil(n,nb), work,
549 $ np0, mcrow, mccol, igd, myrow, mycol, nprow,
550 $ npcol )
551 END IF
552*
553* Broadcast a column block of WORK in every column of template
554*
555 IF( iccol.EQ.-1 ) THEN
556 IF( mycol.EQ.mccol ) THEN
557 IF( lcmp.GT.1 )
558 $ CALL pbdtrsrt( icontxt, 'Col', np, m, nb, work, np0,
559 $ beta, c, ldc, lcmp, lcmq, np0 )
560 CALL dgebs2d( icontxt, 'Row', '1-tree', np, m, c, ldc )
561 ELSE
562 CALL dgebr2d( icontxt, 'Row', '1-tree', np, m, c, ldc,
563 $ myrow, mccol )
564 END IF
565*
566* Send a column block of WORK to the destination column
567*
568 ELSE
569 IF( lcmp.EQ.1 ) THEN
570 IF( mycol.EQ.mccol ) THEN
571 IF( mycol.NE.iccol )
572 $ CALL dgesd2d( icontxt, np, m, work, np, myrow, iccol )
573 ELSE IF( mycol.EQ.iccol ) THEN
574 IF( beta.EQ.zero ) THEN
575 CALL dgerv2d( icontxt, np, m, c, ldc, myrow, mccol )
576 ELSE
577 CALL dgerv2d( icontxt, np, m, work, np, myrow, mccol )
578 CALL pbdmatadd( icontxt, 'G', np, m, one, work, np,
579 $ beta, c, ldc )
580 END IF
581 END IF
582*
583 ELSE
584 ml = m * min( lcmp, max( 0, iceil(n,nb) - mcrow ) )
585 IF( mycol.EQ.mccol ) THEN
586 IF( mycol.NE.iccol )
587 $ CALL dgesd2d( icontxt, np0, ml, work, np0,
588 $ myrow, iccol )
589 ELSE IF( mycol.EQ.iccol ) THEN
590 CALL dgerv2d( icontxt, np0, ml, work, np0,
591 $ myrow, mccol )
592 END IF
593*
594 IF( mycol.EQ.iccol )
595 $ CALL pbdtrsrt( icontxt, 'Col', np, m, nb, work, np0,
596 $ beta, c, ldc, lcmp, lcmq, np0 )
597 END IF
598 END IF
599*
600 END IF
601 END IF
602*
603 RETURN
604*
605* End of PBDTRAN
606*
integer function ilcm(m, n)
Definition ilcm.f:2
#define max(a, b)
Definition macros.h:21
subroutine dgebs2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1082
subroutine pxerbla(contxt, srname, info)
Definition mpi.f:1600
subroutine dgebr2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1123
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition mpi.f:786
subroutine pbdtr2bt(icontxt, adist, trans, m, n, nb, a, lda, beta, b, ldb, intv)
Definition pbdtran.f:706
subroutine pbdtr2af(icontxt, adist, m, n, nb, a, lda, beta, b, ldb, lcmp, lcmq, nint)
Definition pbdtran.f:793
subroutine pbdtr2at(icontxt, adist, trans, m, n, nb, a, lda, beta, b, ldb, lcmp, lcmq)
Definition pbdtran.f:615
subroutine pbdtrget(icontxt, adist, m, n, mnb, a, lda, mcrow, mccol, igd, myrow, mycol, nprow, npcol)
Definition pbdtrget.f:3
subroutine pbdtrsrt(icontxt, adist, m, n, nb, a, lda, beta, b, ldb, lcmp, lcmq, nint)
Definition pbdtrsrt.f:3