OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pbztran.f File Reference

Go to the source code of this file.

Functions/Subroutines

subroutine pbztran (icontxt, adist, trans, m, n, nb, a, lda, beta, c, ldc, iarow, iacol, icrow, iccol, work)
subroutine pbztr2at (icontxt, adist, trans, m, n, nb, a, lda, beta, b, ldb, lcmp, lcmq)
subroutine pbztr2bt (icontxt, adist, trans, m, n, nb, a, lda, beta, b, ldb, intv)
subroutine pbztr2af (icontxt, adist, m, n, nb, a, lda, beta, b, ldb, lcmp, lcmq, nint)

Function/Subroutine Documentation

◆ pbztr2af()

subroutine pbztr2af ( integer icontxt,
character*1 adist,
integer m,
integer n,
integer nb,
complex*16, dimension( lda, * ) a,
integer lda,
complex*16 beta,
complex*16, dimension( ldb, * ) b,
integer ldb,
integer lcmp,
integer lcmq,
integer nint )

Definition at line 791 of file pbztran.f.

793*
794* -- PB-BLAS routine (version 2.1) --
795* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
796* April 28, 1996
797*
798* .. Scalar Arguments ..
799 CHARACTER*1 ADIST
800 INTEGER ICONTXT, M, N, NB, LDA, LDB, LCMP, LCMQ, NINT
801 COMPLEX*16 BETA
802* ..
803* .. Array Arguments ..
804 COMPLEX*16 A( LDA, * ), B( LDB, * )
805* ..
806*
807* Purpose
808* =======
809*
810* PBZTR2AF forms T <== A + BETA*T, where T is a scattered block
811* row (or column) copied from a (condensed) block column (or row) of A
812*
813* =====================================================================
814*
815* .. Parameters ..
816 COMPLEX*16 ONE
817 parameter( one = ( 1.0d+0, 0.0d+0 ) )
818* ..
819* .. Local Scalars ..
820 INTEGER JA, JB, K, INTV
821* ..
822* .. External Functions ..
823 LOGICAL LSAME
824 INTEGER ICEIL
825 EXTERNAL lsame, iceil
826* ..
827* .. Intrinsic Functions ..
828 INTRINSIC min
829* ..
830* .. Executable Statements ..
831*
832 IF( lsame( adist, 'R' ) ) THEN
833 intv = nb * lcmq
834 ja = 1
835 jb = 1
836 DO 10 k = 1, iceil( nint, nb )
837 CALL pbzmatadd( icontxt, 'G', m, min( n-jb+1, nb ), one,
838 $ a(1,ja), lda, beta, b(1,jb), ldb )
839 ja = ja + nb
840 jb = jb + intv
841 10 CONTINUE
842*
843* if( LSAME( ADIST, 'C' ) ) then
844*
845 ELSE
846 intv = nb * lcmp
847 ja = 1
848 jb = 1
849 DO 20 k = 1, iceil( nint, nb )
850 CALL pbzmatadd( icontxt, 'G', min( m-jb+1, nb ), n, one,
851 $ a(ja,1), lda, beta, b(jb,1), ldb )
852 ja = ja + nb
853 jb = jb + intv
854 20 CONTINUE
855 END IF
856*
857 RETURN
858*
859* End of PBZTR2AF
860*
logical function lsame(ca, cb)
LSAME
Definition lsame.f:53
integer function iceil(inum, idenom)
Definition iceil.f:2
#define min(a, b)
Definition macros.h:20
subroutine pbzmatadd(icontxt, mode, m, n, alpha, a, lda, beta, b, ldb)
Definition pbzmatadd.f:3

◆ pbztr2at()

subroutine pbztr2at ( integer icontxt,
character*1 adist,
character*1 trans,
integer m,
integer n,
integer nb,
complex*16, dimension( lda, * ) a,
integer lda,
complex*16 beta,
complex*16, dimension( ldb, * ) b,
integer ldb,
integer lcmp,
integer lcmq )

Definition at line 613 of file pbztran.f.

615*
616* -- PB-BLAS routine (version 2.1) --
617* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
618* April 28, 1996
619*
620* .. Scalar Arguments ..
621 CHARACTER*1 ADIST, TRANS
622 INTEGER ICONTXT, LCMP, LCMQ, LDA, LDB, M, N, NB
623 COMPLEX*16 BETA
624* ..
625* .. Array Arguments ..
626 COMPLEX*16 A( LDA, * ), B( LDB, * )
627* ..
628*
629* Purpose
630* =======
631*
632* PBZTR2AT forms B <== A^T + beta*B, or A^C + beta*B
633* B is a ((conjugate) transposed) scattered block row (or column),
634* copied from a scattered block column (or row) of A
635*
636* =====================================================================
637*
638* .. Parameters ..
639 COMPLEX*16 ONE
640 parameter( one = ( 1.0d+0, 0.0d+0 ) )
641* ..
642* .. Local Scalars ..
643 INTEGER IA, IB, K, INTV, JNTV
644* ..
645* .. External Subroutines ..
646 EXTERNAL pbzmatadd
647* ..
648* .. External Functions ..
649 LOGICAL LSAME
650 INTEGER ICEIL
651 EXTERNAL lsame, iceil
652* ..
653* .. Intrinsic Functions ..
654 INTRINSIC min
655* ..
656* .. Excutable Statements ..
657*
658 IF( lcmp.EQ.lcmq ) THEN
659 CALL pbzmatadd( icontxt, trans, n, m, one, a, lda, beta, b,
660 $ ldb )
661*
662 ELSE
663*
664* If A is a column block ( ADIST = 'C' ),
665*
666 IF( lsame( adist, 'C' ) ) THEN
667 intv = lcmp * nb
668 jntv = lcmq * nb
669 ia = 1
670 ib = 1
671 DO 10 k = 1, iceil( m, intv )
672 CALL pbzmatadd( icontxt, trans, n, min( m-ia+1, nb ),
673 $ one, a(ia,1), lda, beta, b(1,ib), ldb )
674 ia = ia + intv
675 ib = ib + jntv
676 10 CONTINUE
677*
678* If A is a row block ( ADIST = 'R' ),
679*
680 ELSE
681 intv = lcmp * nb
682 jntv = lcmq * nb
683 ia = 1
684 ib = 1
685 DO 20 k = 1, iceil( n, jntv )
686 CALL pbzmatadd( icontxt, trans, min( n-ia+1, nb ), m,
687 $ one, a(1,ia), lda, beta, b(ib,1), ldb )
688 ia = ia + jntv
689 ib = ib + intv
690 20 CONTINUE
691 END IF
692 END IF
693*
694 RETURN
695*
696* End of PBZTR2AT
697*

◆ pbztr2bt()

subroutine pbztr2bt ( integer icontxt,
character*1 adist,
character*1 trans,
integer m,
integer n,
integer nb,
complex*16, dimension( lda, * ) a,
integer lda,
complex*16 beta,
complex*16, dimension( ldb, * ) b,
integer ldb,
integer intv )

Definition at line 704 of file pbztran.f.

706*
707* -- PB-BLAS routine (version 2.1) --
708* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
709* April 28, 1996
710*
711* .. Scalar Arguments ..
712 CHARACTER*1 ADIST, TRANS
713 INTEGER ICONTXT, INTV, LDA, LDB, M, N, NB
714 COMPLEX*16 BETA
715* ..
716* .. Array Arguments ..
717 COMPLEX*16 A( LDA, * ), B( LDB, * )
718* ..
719*
720* Purpose
721* =======
722*
723* PBZTR2BT forms T <== A^T + beta*T or A^C + beta*T, where T is a
724* ((conjugate) transposed) condensed block row (or column), copied from
725* a scattered block column (or row) of A
726*
727* =====================================================================
728*
729* .. Parameters ..
730 COMPLEX*16 ONE
731 parameter( one = ( 1.0d+0, 0.0d+0 ) )
732* ..
733* .. Local Scalars ..
734 INTEGER IA, IB, K
735* ..
736* .. External Functions ..
737 LOGICAL LSAME
738 INTEGER ICEIL
739 EXTERNAL lsame, iceil
740* ..
741* .. External Subroutines ..
742 EXTERNAL pbzmatadd
743* ..
744* .. Intrinsic Functions ..
745 INTRINSIC min
746* ..
747* .. Excutable Statements ..
748*
749 IF( intv.EQ.nb ) THEN
750 CALL pbzmatadd( icontxt, trans, n, m, one, a, lda, beta, b,
751 $ ldb )
752*
753 ELSE
754*
755* If A is a column block ( ADIST = 'C' ),
756*
757 IF( lsame( adist, 'C' ) ) THEN
758 ia = 1
759 ib = 1
760 DO 10 k = 1, iceil( m, intv )
761 CALL pbzmatadd( icontxt, trans, n, min( m-ia+1, nb ),
762 $ one, a(ia,1), lda, beta, b(1,ib), ldb )
763 ia = ia + intv
764 ib = ib + nb
765 10 CONTINUE
766*
767* If A is a row block (ADIST = 'R'),
768*
769 ELSE
770 ia = 1
771 ib = 1
772 DO 20 k = 1, iceil( n, intv )
773 CALL pbzmatadd( icontxt, trans, min( n-ia+1, nb ), m,
774 $ one, a(1,ia), lda, beta, b(ib,1), ldb )
775 ia = ia + intv
776 ib = ib + nb
777 20 CONTINUE
778 END IF
779 END IF
780*
781 RETURN
782*
783* End of PBZTR2BT
784*

◆ pbztran()

subroutine pbztran ( integer icontxt,
character*1 adist,
character*1 trans,
integer m,
integer n,
integer nb,
complex*16, dimension( lda, * ) a,
integer lda,
complex*16 beta,
complex*16, dimension( ldc, * ) c,
integer ldc,
integer iarow,
integer iacol,
integer icrow,
integer iccol,
complex*16, dimension( * ) work )

Definition at line 1 of file pbztran.f.

3*
4* -- PB-BLAS routine (version 2.1) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
6* April 28, 1996
7*
8* Jaeyoung Choi, Oak Ridge National Laboratory
9* Jack Dongarra, University of Tennessee and Oak Ridge National Lab.
10* David Walker, Oak Ridge National Laboratory
11*
12* .. Scalar Arguments ..
13 CHARACTER*1 ADIST, TRANS
14 INTEGER IACOL, IAROW, ICCOL, ICONTXT, ICROW, LDA, LDC,
15 $ M, N, NB
16 COMPLEX*16 BETA
17* ..
18* .. Array Arguments ..
19 COMPLEX*16 A( LDA, * ), C( LDC, * ), WORK( * )
20* ..
21*
22* Purpose
23* =======
24*
25* PBZTRAN transposes a column block to row block, or a row block to
26* column block by reallocating data distribution.
27*
28* C := A^T + beta*C, or C := A^C + beta*C
29*
30* where A is an M-by-N matrix and C is an N-by-M matrix, and the size
31* of M or N is limited to its block size NB.
32*
33* The first elements of the matrices A, and C should be located at
34* the beginnings of their first blocks. (not the middle of the blocks.)
35*
36* Parameters
37* ==========
38*
39* ICONTXT (input) INTEGER
40* ICONTXT is the BLACS mechanism for partitioning communication
41* space. A defining property of a context is that a message in
42* a context cannot be sent or received in another context. The
43* BLACS context includes the definition of a grid, and each
44* process' coordinates in it.
45*
46* ADIST - (input) CHARACTER*1
47* ADIST specifies whether A is a column block or a row block.
48*
49* ADIST = 'C', A is a column block
50* ADIST = 'R', A is a row block
51*
52* TRANS - (input) CHARACTER*1
53* TRANS specifies whether the transposed format is transpose
54* or conjugate transpose. If the matrices A and C are real,
55* the argument is ignored.
56*
57* TRANS = 'T', transpose
58* TRANS = 'C', conjugate transpose
59*
60* M - (input) INTEGER
61* M specifies the (global) number of rows of the matrix (block
62* column or block row) A and of columns of the matrix C.
63* M >= 0.
64*
65* N - (input) INTEGER
66* N specifies the (global) number of columns of the matrix
67* (block column or block row) A and of columns of the matrix
68* C. N >= 0.
69*
70* NB - (input) INTEGER
71* NB specifies the column block size of the matrix A and the
72* row block size of the matrix C when ADIST = 'C'. Otherwise,
73* it specifies the row block size of the matrix A and the
74* column block size of the matrix C. NB >= 1.
75*
76* A (input) COMPLEX*16 array of DIMENSION ( LDA, Lx ),
77* where Lx is N when ADIST = 'C', or Nq when ADIST = 'R'.
78* Before entry with ADIST = 'C', the leading Mp by N part of
79* the array A must contain the matrix A, otherwise the leading
80* M by Nq part of the array A must contain the matrix A. See
81* parameter details for the values of Mp and Nq.
82*
83* LDA (input) INTEGER
84* LDA specifies the leading dimension of (local) A as declared
85* in the calling (sub) program. LDA >= MAX(1,Mp) when
86* ADIST = 'C', or LDA >= MAX(1,M) otherwise.
87*
88* BETA (input) COMPLEX*16
89* BETA specifies scaler beta.
90*
91* C (input/output) COMPLEX*16 array of DIMENSION ( LDC, Lx ),
92* where Lx is Mq when ADIST = 'C', or N when ADIST = 'R'.
93* If ADIST = 'C', the leading N-by-Mq part of the array C
94* contains the (local) matrix C, otherwise the leading
95* Np-by-M part of the array C must contain the (local) matrix
96* C. C will not be referenced if beta is zero.
97*
98* LDC (input) INTEGER
99* LDC specifies the leading dimension of (local) C as declared
100* in the calling (sub) program. LDC >= MAX(1,N) when ADIST='C',
101* or LDC >= MAX(1,Np) otherwise.
102*
103* IAROW (input) INTEGER
104* IAROW specifies a row of the process template,
105* which holds the first block of the matrix A. If A is a row
106* of blocks (ADIST = 'R') and all rows of processes have a copy
107* of A, then set IAROW = -1.
108*
109* IACOL (input) INTEGER
110* IACOL specifies a column of the process template,
111* which holds the first block of the matrix A. If A is a
112* column of blocks (ADIST = 'C') and all columns of processes
113* have a copy of A, then set IACOL = -1.
114*
115* ICROW (input) INTEGER
116* ICROW specifies the current row process which holds
117* the first block of the matrix C, which is transposed of A.
118* If C is a row of blocks (ADIST = 'C') and the transposed
119* row block C is distributed all rows of processes, set
120* ICROW = -1.
121*
122* ICCOL (input) INTEGER
123* ICCOL specifies the current column process which holds
124* the first block of the matrix C, which is transposed of A.
125* If C is a column of blocks (ADIST = 'R') and the transposed
126* column block C is distributed all columns of processes,
127* set ICCOL = -1.
128*
129* WORK (workspace) COMPLEX*16 array of dimension Size(WORK).
130* It needs extra working space of A'.
131*
132* Parameters Details
133* ==================
134*
135* Lx It is a local portion of L owned by a process, (L is
136* replaced by M, or N, and x is replaced by either p (=NPROW)
137* or q (=NPCOL)). The value is determined by L, LB, x, and
138* MI, where LB is a block size and MI is a row or column
139* position in a process template. Lx is equal to or less
140* than Lx0 = CEIL( L, LB*x ) * LB.
141*
142* Communication Scheme
143* ====================
144*
145* The communication scheme of the routine is set to '1-tree', which is
146* fan-out. (For details, see BLACS user's guide.)
147*
148* Memory Requirement of WORK
149* ==========================
150*
151* Mqb = CEIL( M, NB*NPCOL )
152* Npb = CEIL( N, NB*NPROW )
153* LCMQ = LCM / NPCOL
154* LCMP = LCM / NPROW
155*
156* (1) ADIST = 'C'
157* (a) IACOL != -1
158* Size(WORK) = N * CEIL(Mqb,LCMQ)*NB
159* (b) IACOL = -1
160* Size(WORK) = N * CEIL(Mqb,LCMQ)*NB * MIN(LCMQ,CEIL(M,NB))
161*
162* (2) ADIST = 'R'
163* (a) IAROW != -1
164* Size(WORK) = M * CEIL(Npb,LCMP)*NB
165* (b) IAROW = -1
166* Size(WORK) = M * CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(N,NB))
167*
168* Notes
169* -----
170* More precise space can be computed as
171*
172* CEIL(Mqb,LCMQ)*NB => NUMROC( NUMROC(M,NB,0,0,NPCOL), NB, 0, 0, LCMQ )
173* CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(N,NB,0,0,NPROW), NB, 0, 0, LCMP )
174*
175* =====================================================================
176*
177* ..
178* .. Parameters ..
179 COMPLEX*16 ONE, ZERO
180 parameter( one = ( 1.0d+0, 0.0d+0 ),
181 $ zero = ( 0.0d+0, 0.0d+0 ) )
182* ..
183* .. Local Scalars ..
184 LOGICAL COLFORM, ROWFORM
185 INTEGER I, IDEX, IGD, INFO, JCCOL, JCROW, JDEX, LCM,
186 $ LCMP, LCMQ, MCCOL, MCROW, ML, MP, MQ, MQ0,
187 $ MRCOL, MRROW, MYCOL, MYROW, NP, NP0, NPCOL,
188 $ NPROW, NQ
189 COMPLEX*16 TBETA
190* ..
191* .. External Functions ..
192 LOGICAL LSAME
193 INTEGER ILCM, ICEIL, NUMROC
194 EXTERNAL ilcm, iceil, lsame, numroc
195* ..
196* .. External Subroutines ..
199 $ zgebs2d, zgerv2d, zgesd2d
200* ..
201* .. Intrinsic Functions ..
202 INTRINSIC max, min, mod
203* ..
204* .. Executable Statements ..
205*
206* Quick return if possible.
207*
208 IF( m.EQ.0 .OR. n.EQ.0 ) RETURN
209*
210 CALL blacs_gridinfo( icontxt, nprow, npcol, myrow, mycol )
211*
212 colform = lsame( adist, 'C' )
213 rowform = lsame( adist, 'R' )
214*
215* Test the input parameters.
216*
217 info = 0
218 IF( ( .NOT.colform ) .AND. ( .NOT.rowform ) ) THEN
219 info = 2
220 ELSE IF( m .LT.0 ) THEN
221 info = 4
222 ELSE IF( n .LT.0 ) THEN
223 info = 5
224 ELSE IF( nb.LT.1 ) THEN
225 info = 6
226 ELSE IF( iarow.LT.-1 .OR. iarow.GE.nprow .OR.
227 $ ( iarow.EQ.-1 .AND. colform ) ) THEN
228 info = 12
229 ELSE IF( iacol.LT.-1 .OR. iacol.GE.npcol .OR.
230 $ ( iacol.EQ.-1 .AND. rowform ) ) THEN
231 info = 13
232 ELSE IF( icrow.LT.-1 .OR. icrow.GE.nprow .OR.
233 $ ( icrow.EQ.-1 .AND. rowform ) ) THEN
234 info = 14
235 ELSE IF( iccol.LT.-1 .OR. iccol.GE.npcol .OR.
236 $ ( iccol.EQ.-1 .AND. colform ) ) THEN
237 info = 15
238 END IF
239*
240 10 CONTINUE
241 IF( info .NE. 0 ) THEN
242 CALL pxerbla( icontxt, 'PBZTRAN ', info )
243 RETURN
244 END IF
245*
246* Start the operations.
247*
248* LCM : the least common multiple of NPROW and NPCOL
249*
250 lcm = ilcm( nprow, npcol )
251 lcmp = lcm / nprow
252 lcmq = lcm / npcol
253 igd = npcol / lcmp
254*
255* When A is a column block
256*
257 IF( colform ) THEN
258*
259* Form C <== A' ( A is a column block )
260* _
261* | |
262* | |
263* _____________ | |
264* |______C______| <== |A|
265* | |
266* | |
267* |_|
268*
269* MRROW : row relative position in template from IAROW
270* MRCOL : column relative position in template from ICCOL
271*
272 mrrow = mod( nprow+myrow-iarow, nprow )
273 mrcol = mod( npcol+mycol-iccol, npcol )
274 jcrow = icrow
275 IF( icrow.EQ.-1 ) jcrow = iarow
276*
277 mp = numroc( m, nb, myrow, iarow, nprow )
278 mq = numroc( m, nb, mycol, iccol, npcol )
279 mq0 = numroc( numroc(m, nb, 0, 0, npcol), nb, 0, 0, lcmq )
280*
281 IF( lda.LT.mp .AND.
282 $ ( iacol.EQ.mycol .OR. iacol.EQ.-1 ) ) THEN
283 info = 8
284 ELSE IF( ldc.LT.n .AND.
285 $ ( icrow.EQ.myrow .OR. icrow.EQ.-1 ) ) THEN
286 info = 11
287 END IF
288 IF( info.NE.0 ) GO TO 10
289*
290* When a column process of IACOL has a column block A,
291*
292 IF( iacol.GE.0 ) THEN
293 tbeta = zero
294 IF( myrow.EQ.jcrow ) tbeta = beta
295*
296 DO 20 i = 0, min( lcm, iceil(m,nb) ) - 1
297 mcrow = mod( mod(i, nprow) + iarow, nprow )
298 mccol = mod( mod(i, npcol) + iccol, npcol )
299 IF( lcmq.EQ.1 ) mq0 = numroc( m, nb, i, 0, npcol )
300 jdex = (i/npcol) * nb
301*
302* A source node copies the blocks to WORK, and send it
303*
304 IF( myrow.EQ.mcrow .AND. mycol.EQ.iacol ) THEN
305*
306* The source node is a destination node
307*
308 idex = (i/nprow) * nb
309 IF( myrow.EQ.jcrow .AND. mycol.EQ.mccol ) THEN
310 CALL pbztr2at( icontxt, 'col', TRANS, MP-IDEX, N, NB,
311 $ A(IDEX+1,1), LDA, TBETA, C(1,JDEX+1),
312 $ LDC, LCMP, LCMQ )
313*
314* The source node sends blocks to a destination node
315*
316 ELSE
317 CALL PBZTR2BT( ICONTXT, 'col', TRANS, MP-IDEX, N, NB,
318 $ A(IDEX+1,1), LDA, ZERO, WORK, N,
319 $ LCMP*NB )
320 CALL ZGESD2D( ICONTXT, N, MQ0, WORK, N, JCROW, MCCOL )
321 END IF
322*
323* A destination node receives the copied blocks
324*
325.EQ..AND..EQ. ELSE IF( MYROWJCROW MYCOLMCCOL ) THEN
326.EQ..AND..EQ. IF( LCMQ1 TBETAZERO ) THEN
327 CALL ZGERV2D( ICONTXT, N, MQ0, C, LDC, MCROW, IACOL )
328 ELSE
329 CALL ZGERV2D( ICONTXT, N, MQ0, WORK, N, MCROW, IACOL )
330 CALL PBZTR2AF( ICONTXT, 'row', N, MQ-JDEX, NB, WORK, N,
331 $ TBETA, C(1,JDEX+1), LDC, LCMP, LCMQ,
332 $ MQ0 )
333 END IF
334 END IF
335 20 CONTINUE
336*
337* Broadcast a row block of C in each column of template
338*
339.EQ. IF( ICROW-1 ) THEN
340.EQ. IF( MYROWJCROW ) THEN
341 CALL ZGEBS2D( ICONTXT, 'col', '1-tree', N, MQ, C, LDC )
342 ELSE
343 CALL ZGEBR2D( ICONTXT, 'col', '1-tree', N, MQ, C, LDC,
344 $ JCROW, MYCOL )
345 END IF
346 END IF
347*
348* When all column procesors have a copy of the column block A,
349*
350 ELSE
351.EQ. IF( LCMQ1 ) MQ0 = MQ
352*
353* Processors, which have diagonal blocks of A, copy them to
354* WORK array in transposed form
355*
356 DO 30 I = 0, LCMP-1
357.EQ. IF( MRCOLMOD( NPROW*I+MRROW, NPCOL ) ) THEN
358.EQ..AND..EQ..OR..EQ. IF( LCMQ1(ICROW-1ICROWMYROW) ) THEN
359 CALL PBZTR2BT( ICONTXT, 'col', TRANS, MP-I*NB, N, NB,
360 $ A(I*NB+1,1), LDA, BETA, C, LDC,
361 $ LCMP*NB )
362 ELSE
363 CALL PBZTR2BT( ICONTXT, 'col', TRANS, MP-I*NB, N, NB,
364 $ A(I*NB+1,1), LDA, ZERO, WORK, N,
365 $ LCMP*NB )
366 END IF
367 END IF
368 30 CONTINUE
369*
370* Get diagonal blocks of A for each column of the template
371*
372 MCROW = MOD( MOD(MRCOL,NPROW)+IAROW, NPROW )
373.GT. IF( LCMQ1 ) THEN
374 MCCOL = MOD( NPCOL+MYCOL-ICCOL, NPCOL )
375 CALL PBZTRGET( ICONTXT, 'row', N, MQ0, ICEIL(M,NB), WORK, N,
376 $ MCROW, MCCOL, IGD, MYROW, MYCOL, NPROW,
377 $ NPCOL )
378 END IF
379*
380* Broadcast a row block of WORK in every row of template
381*
382.EQ. IF( ICROW-1 ) THEN
383.EQ. IF( MYROWMCROW ) THEN
384.GT. IF( LCMQ1 )
385 $ CALL PBZTRSRT( ICONTXT, 'row', N, MQ, NB, WORK, N, BETA,
386 $ C, LDC, LCMP, LCMQ, MQ0 )
387 CALL ZGEBS2D( ICONTXT, 'col', '1-tree', N, MQ, C, LDC )
388 ELSE
389 CALL ZGEBR2D( ICONTXT, 'col', '1-tree', N, MQ, C, LDC,
390 $ MCROW, MYCOL )
391 END IF
392*
393* Send a row block of WORK to the destination row
394*
395 ELSE
396.EQ. IF( LCMQ1 ) THEN
397.EQ. IF( MYROWMCROW ) THEN
398.NE. IF( MYROWICROW )
399 $ CALL ZGESD2D( ICONTXT, N, MQ, WORK, N, ICROW, MYCOL )
400.EQ. ELSE IF( MYROWICROW ) THEN
401.EQ. IF( BETAZERO ) THEN
402 CALL ZGERV2D( ICONTXT, N, MQ, C, LDC, MCROW, MYCOL )
403 ELSE
404 CALL ZGERV2D( ICONTXT, N, MQ, WORK, N, MCROW, MYCOL )
405 CALL PBZMATADD( ICONTXT, 'g', N, MQ, ONE, WORK, N,
406 $ BETA, C, LDC )
407 END IF
408 END IF
409*
410 ELSE
411 ML = MQ0 * MIN( LCMQ, MAX(0,ICEIL(M,NB)-MCCOL) )
412.EQ. IF( MYROWMCROW ) THEN
413.NE. IF( MYROWICROW )
414 $ CALL ZGESD2D( ICONTXT, N, ML, WORK, N, ICROW, MYCOL )
415.EQ. ELSE IF( MYROWICROW ) THEN
416 CALL ZGERV2D( ICONTXT, N, ML, WORK, N, MCROW, MYCOL )
417 END IF
418*
419.EQ. IF( MYROWICROW )
420 $ CALL PBZTRSRT( ICONTXT, 'row', N, MQ, NB, WORK, N, BETA,
421 $ C, LDC, LCMP, LCMQ, MQ0 )
422 END IF
423 END IF
424*
425 END IF
426*
427* When A is a row block
428*
429 ELSE
430*
431* Form C <== A' ( A is a row block )
432* _
433* | |
434* | |
435* | | _____________
436* |C| <== |______A______|
437* | |
438* | |
439* |_|
440*
441* MRROW : row relative position in template from ICROW
442* MRCOL : column relative position in template from IACOL
443*
444 MRROW = MOD( NPROW+MYROW-ICROW, NPROW )
445 MRCOL = MOD( NPCOL+MYCOL-IACOL, NPCOL )
446 JCCOL = ICCOL
447.EQ. IF( ICCOL-1 ) JCCOL = IACOL
448*
449 NP = NUMROC( N, NB, MYROW, ICROW, NPROW )
450 NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL )
451 NP0 = NUMROC( NUMROC(N, NB, 0, 0, NPROW), NB, 0, 0, LCMP )
452*
453.LT..AND. IF( LDAM
454.EQ..OR..EQ. $ ( IAROWMYROW IAROW-1 ) ) THEN
455 INFO = 8
456.LT..AND. ELSE IF( LDCNP
457.EQ..OR..EQ. $ ( ICCOLMYCOL ICCOL-1 ) ) THEN
458 INFO = 11
459 END IF
460.NE. IF( INFO0 ) GO TO 10
461*
462* When a row process of IAROW has a row block A,
463*
464.GE. IF( IAROW0 ) THEN
465 TBETA = ZERO
466.EQ. IF( MYCOLJCCOL ) TBETA = BETA
467*
468 DO 40 I = 0, MIN( LCM, ICEIL(N,NB) ) - 1
469 MCROW = MOD( MOD(I, NPROW) + ICROW, NPROW )
470 MCCOL = MOD( MOD(I, NPCOL) + IACOL, NPCOL )
471.EQ. IF( LCMP1 ) NP0 = NUMROC( N, NB, I, 0, NPROW )
472 IDEX = (I/NPROW) * NB
473*
474* A source node copies the blocks to WORK, and send it
475*
476.EQ..AND..EQ. IF( MYROWIAROW MYCOLMCCOL ) THEN
477*
478* The source node is a destination node
479*
480 JDEX = (I/NPCOL) * NB
481.EQ..AND..EQ. IF( MYROWMCROW MYCOLJCCOL ) THEN
482 CALL PBZTR2AT( ICONTXT, 'row', TRANS, M, NQ-JDEX, NB,
483 $ A(1,JDEX+1), LDA, TBETA, C(IDEX+1,1),
484 $ LDC, LCMP, LCMQ )
485*
486* The source node sends blocks to a destination node
487*
488 ELSE
489 CALL PBZTR2BT( ICONTXT, 'row', TRANS, M, NQ-JDEX, NB,
490 $ A(1,JDEX+1), LDA, ZERO, WORK, NP0,
491 $ LCMQ*NB )
492 CALL ZGESD2D( ICONTXT, NP0, M, WORK, NP0,
493 $ MCROW, JCCOL )
494 END IF
495*
496* A destination node receives the copied blocks
497*
498.EQ..AND..EQ. ELSE IF( MYROWMCROW MYCOLJCCOL ) THEN
499.EQ..AND..EQ. IF( LCMP1 TBETAZERO ) THEN
500 CALL ZGERV2D( ICONTXT, NP0, M, C, LDC, IAROW, MCCOL )
501 ELSE
502 CALL ZGERV2D( ICONTXT, NP0, M, WORK, NP0, IAROW, MCCOL )
503 CALL PBZTR2AF( ICONTXT, 'col', NP-IDEX, M, NB, WORK,
504 $ NP0, TBETA, C(IDEX+1,1), LDC, LCMP, LCMQ,
505 $ NP0 )
506 END IF
507 END IF
508 40 CONTINUE
509*
510* Broadcast a column block of WORK in each row of template
511*
512.EQ. IF( ICCOL-1 ) THEN
513.EQ. IF( MYCOLJCCOL ) THEN
514 CALL ZGEBS2D( ICONTXT, 'row', '1-tree', NP, M, C, LDC )
515 ELSE
516 CALL ZGEBR2D( ICONTXT, 'row', '1-tree', NP, M, C, LDC,
517 $ MYROW, JCCOL )
518 END IF
519 END IF
520*
521* When all row procesors have a copy of the row block A,
522*
523 ELSE
524.EQ. IF( LCMP1 ) NP0 = NP
525*
526* Processors, which have diagonal blocks of A, copy them to
527* WORK array in transposed form
528*
529 DO 50 I = 0, LCMQ-1
530.EQ. IF( MRROWMOD(NPCOL*I+MRCOL, NPROW) ) THEN
531.EQ..AND..EQ..OR..EQ. IF( LCMP1(ICCOL-1ICCOLMYCOL) ) THEN
532 CALL PBZTR2BT( ICONTXT, 'row', TRANS, M, NQ-I*NB, NB,
533 $ A(1,I*NB+1), LDA, BETA, C, LDC,
534 $ LCMQ*NB )
535 ELSE
536 CALL PBZTR2BT( ICONTXT, 'row', TRANS, M, NQ-I*NB, NB,
537 $ A(1,I*NB+1), LDA, ZERO, WORK, NP0,
538 $ LCMQ*NB )
539 END IF
540 END IF
541 50 CONTINUE
542*
543* Get diagonal blocks of A for each row of the template
544*
545 MCCOL = MOD( MOD(MRROW, NPCOL)+IACOL, NPCOL )
546.GT. IF( LCMP1 ) THEN
547 MCROW = MOD( NPROW+MYROW-ICROW, NPROW )
548 CALL PBZTRGET( ICONTXT, 'col', NP0, M, ICEIL(N,NB), WORK,
549 $ NP0, MCROW, MCCOL, IGD, MYROW, MYCOL, NPROW,
550 $ NPCOL )
551 END IF
552*
553* Broadcast a column block of WORK in every column of template
554*
555.EQ. IF( ICCOL-1 ) THEN
556.EQ. IF( MYCOLMCCOL ) THEN
557.GT. IF( LCMP1 )
558 $ CALL PBZTRSRT( ICONTXT, 'col', np, m, nb, work, np0,
559 $ beta, c, ldc, lcmp, lcmq, np0 )
560 CALL zgebs2d( icontxt, 'Row', '1-tree', np, m, c, ldc )
561 ELSE
562 CALL zgebr2d( icontxt, 'row', '1-tree', NP, M, C, LDC,
563 $ MYROW, MCCOL )
564 END IF
565*
566* Send a column block of WORK to the destination column
567*
568 ELSE
569.EQ. IF( LCMP1 ) THEN
570.EQ. IF( MYCOLMCCOL ) THEN
571.NE. IF( MYCOLICCOL )
572 $ CALL ZGESD2D( ICONTXT, NP, M, WORK, NP, MYROW, ICCOL )
573.EQ. ELSE IF( MYCOLICCOL ) THEN
574.EQ. IF( BETAZERO ) THEN
575 CALL ZGERV2D( ICONTXT, NP, M, C, LDC, MYROW, MCCOL )
576 ELSE
577 CALL ZGERV2D( ICONTXT, NP, M, WORK, NP, MYROW, MCCOL )
578 CALL PBZMATADD( ICONTXT, 'g', NP, M, ONE, WORK, NP,
579 $ BETA, C, LDC )
580 END IF
581 END IF
582*
583 ELSE
584 ML = M * MIN( LCMP, MAX( 0, ICEIL(N,NB) - MCROW ) )
585.EQ. IF( MYCOLMCCOL ) THEN
586.NE. IF( MYCOLICCOL )
587 $ CALL ZGESD2D( ICONTXT, NP0, ML, WORK, NP0,
588 $ MYROW, ICCOL )
589.EQ. ELSE IF( MYCOLICCOL ) THEN
590 CALL ZGERV2D( ICONTXT, NP0, ML, WORK, NP0,
591 $ MYROW, MCCOL )
592 END IF
593*
594.EQ. IF( MYCOLICCOL )
595 $ CALL PBZTRSRT( ICONTXT, 'col', NP, M, NB, WORK, NP0,
596 $ BETA, C, LDC, LCMP, LCMQ, NP0 )
597 END IF
598 END IF
599*
600 END IF
601 END IF
602*
603 RETURN
604*
605* End of PBZTRAN
606*
integer function ilcm(m, n)
Definition ilcm.f:2
#define max(a, b)
Definition macros.h:21
subroutine zgebr2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1092
subroutine zgebs2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1051
subroutine pxerbla(contxt, srname, info)
Definition mpi.f:1600
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition mpi.f:786
subroutine pbztr2af(icontxt, adist, m, n, nb, a, lda, beta, b, ldb, lcmp, lcmq, nint)
Definition pbztran.f:793
subroutine pbztr2bt(icontxt, adist, trans, m, n, nb, a, lda, beta, b, ldb, intv)
Definition pbztran.f:706
subroutine pbztr2at(icontxt, adist, trans, m, n, nb, a, lda, beta, b, ldb, lcmp, lcmq)
Definition pbztran.f:615
subroutine pbztrget(icontxt, adist, m, n, mnb, a, lda, mcrow, mccol, igd, myrow, mycol, nprow, npcol)
Definition pbztrget.f:3
subroutine pbztrsrt(icontxt, adist, m, n, nb, a, lda, beta, b, ldb, lcmp, lcmq, nint)
Definition pbztrsrt.f:3