OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pdlarzb.f
Go to the documentation of this file.
1 SUBROUTINE pdlarzb( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V,
2 $ IV, JV, DESCV, T, C, IC, JC, DESCC, WORK )
3*
4* -- ScaLAPACK auxiliary routine (version 2.0.2) --
5* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
6* May 1 2012
7*
8* .. Scalar Arguments ..
9 CHARACTER DIRECT, SIDE, STOREV, TRANS
10 INTEGER IC, IV, JC, JV, K, L, M, N
11* ..
12* .. Array Arguments ..
13 INTEGER DESCC( * ), DESCV( * )
14 DOUBLE PRECISION C( * ), T( * ), V( * ), WORK( * )
15* ..
16*
17* Purpose
18* =======
19*
20* PDLARZB applies a real block reflector Q or its transpose Q**T to
21* a real distributed M-by-N matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1)
22* from the left or the right.
23*
24* Q is a product of k elementary reflectors as returned by PDTZRZF.
25*
26* Currently, only STOREV = 'R' and DIRECT = 'B' are supported.
27*
28* Notes
29* =====
30*
31* Each global data object is described by an associated description
32* vector. This vector stores the information required to establish
33* the mapping between an object element and its corresponding process
34* and memory location.
35*
36* Let A be a generic term for any 2D block cyclicly distributed array.
37* Such a global array has an associated description vector DESCA.
38* In the following comments, the character _ should be read as
39* "of the global array".
40*
41* NOTATION STORED IN EXPLANATION
42* --------------- -------------- --------------------------------------
43* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
44* DTYPE_A = 1.
45* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
46* the BLACS process grid A is distribu-
47* ted over. The context itself is glo-
48* bal, but the handle (the integer
49* value) may vary.
50* M_A (global) DESCA( M_ ) The number of rows in the global
51* array A.
52* N_A (global) DESCA( N_ ) The number of columns in the global
53* array A.
54* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
55* the rows of the array.
56* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
57* the columns of the array.
58* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
59* row of the array A is distributed.
60* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
61* first column of the array A is
62* distributed.
63* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
64* array. LLD_A >= MAX(1,LOCr(M_A)).
65*
66* Let K be the number of rows or columns of a distributed matrix,
67* and assume that its process grid has dimension p x q.
68* LOCr( K ) denotes the number of elements of K that a process
69* would receive if K were distributed over the p processes of its
70* process column.
71* Similarly, LOCc( K ) denotes the number of elements of K that a
72* process would receive if K were distributed over the q processes of
73* its process row.
74* The values of LOCr() and LOCc() may be determined via a call to the
75* ScaLAPACK tool function, NUMROC:
76* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
77* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
78* An upper bound for these quantities may be computed by:
79* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
80* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
81*
82* Arguments
83* =========
84*
85* SIDE (global input) CHARACTER
86* = 'L': apply Q or Q**T from the Left;
87* = 'R': apply Q or Q**T from the Right.
88*
89* TRANS (global input) CHARACTER
90* = 'N': No transpose, apply Q;
91* = 'T': Transpose, apply Q**T.
92*
93* DIRECT (global input) CHARACTER
94* Indicates how H is formed from a product of elementary
95* reflectors
96* = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet)
97* = 'B': H = H(k) . . . H(2) H(1) (Backward)
98*
99* STOREV (global input) CHARACTER
100* Indicates how the vectors which define the elementary
101* reflectors are stored:
102* = 'C': Columnwise (not supported yet)
103* = 'R': Rowwise
104*
105* M (global input) INTEGER
106* The number of rows to be operated on i.e the number of rows
107* of the distributed submatrix sub( C ). M >= 0.
108*
109* N (global input) INTEGER
110* The number of columns to be operated on i.e the number of
111* columns of the distributed submatrix sub( C ). N >= 0.
112*
113* K (global input) INTEGER
114* The order of the matrix T (= the number of elementary
115* reflectors whose product defines the block reflector).
116*
117* L (global input) INTEGER
118* The columns of the distributed submatrix sub( A ) containing
119* the meaningful part of the Householder reflectors.
120* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
121*
122* V (local input) DOUBLE PRECISION pointer into the local memory
123* to an array of dimension (LLD_V, LOCc(JV+M-1)) if SIDE = 'L',
124* (LLD_V, LOCc(JV+N-1)) if SIDE = 'R'. It contains the local
125* pieces of the distributed vectors V representing the
126* Householder transformation as returned by PDTZRZF.
127* LLD_V >= LOCr(IV+K-1).
128*
129* IV (global input) INTEGER
130* The row index in the global array V indicating the first
131* row of sub( V ).
132*
133* JV (global input) INTEGER
134* The column index in the global array V indicating the
135* first column of sub( V ).
136*
137* DESCV (global and local input) INTEGER array of dimension DLEN_.
138* The array descriptor for the distributed matrix V.
139*
140* T (local input) DOUBLE PRECISION array, dimension MB_V by MB_V
141* The lower triangular matrix T in the representation of the
142* block reflector.
143*
144* C (local input/local output) DOUBLE PRECISION pointer into the
145* local memory to an array of dimension (LLD_C,LOCc(JC+N-1)).
146* On entry, the M-by-N distributed matrix sub( C ). On exit,
147* sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) or
148* sub( C )*Q or sub( C )*Q'.
149*
150* IC (global input) INTEGER
151* The row index in the global array C indicating the first
152* row of sub( C ).
153*
154* JC (global input) INTEGER
155* The column index in the global array C indicating the
156* first column of sub( C ).
157*
158* DESCC (global and local input) INTEGER array of dimension DLEN_.
159* The array descriptor for the distributed matrix C.
160*
161* WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK)
162* If STOREV = 'C',
163* if SIDE = 'L',
164* LWORK >= ( NqC0 + MpC0 ) * K
165* else if SIDE = 'R',
166* LWORK >= ( NqC0 + MAX( NpV0 + NUMROC( NUMROC( N+ICOFFC,
167* NB_V, 0, 0, NPCOL ), NB_V, 0, 0, LCMQ ),
168* MpC0 ) ) * K
169* end if
170* else if STOREV = 'R',
171* if SIDE = 'L',
172* LWORK >= ( MpC0 + MAX( MqV0 + NUMROC( NUMROC( M+IROFFC,
173* MB_V, 0, 0, NPROW ), MB_V, 0, 0, LCMP ),
174* NqC0 ) ) * K
175* else if SIDE = 'R',
176* LWORK >= ( MpC0 + NqC0 ) * K
177* end if
178* end if
179*
180* where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ),
181*
182* IROFFV = MOD( IV-1, MB_V ), ICOFFV = MOD( JV-1, NB_V ),
183* IVROW = INDXG2P( IV, MB_V, MYROW, RSRC_V, NPROW ),
184* IVCOL = INDXG2P( JV, NB_V, MYCOL, CSRC_V, NPCOL ),
185* MqV0 = NUMROC( M+ICOFFV, NB_V, MYCOL, IVCOL, NPCOL ),
186* NpV0 = NUMROC( N+IROFFV, MB_V, MYROW, IVROW, NPROW ),
187*
188* IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),
189* ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),
190* ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),
191* MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),
192* NpC0 = NUMROC( N+ICOFFC, MB_C, MYROW, ICROW, NPROW ),
193* NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),
194*
195* ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
196* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
197* the subroutine BLACS_GRIDINFO.
198*
199* Alignment requirements
200* ======================
201*
202* The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)
203* must verify some alignment properties, namely the following
204* expressions should be true:
205*
206* If STOREV = 'Columnwise'
207* If SIDE = 'Left',
208* ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )
209* If SIDE = 'Right',
210* ( MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )
211* else if STOREV = 'Rowwise'
212* If SIDE = 'Left',
213* ( NB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )
214* If SIDE = 'Right',
215* ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )
216* end if
217*
218* =====================================================================
219*
220* .. Parameters ..
221 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
222 $ lld_, mb_, m_, nb_, n_, rsrc_
223 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
224 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
225 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
226 DOUBLE PRECISION ONE, ZERO
227 parameter( one = 1.0d+0, zero = 0.0d+0 )
228* ..
229* .. Local Scalars ..
230 LOGICAL LEFT
231 CHARACTER COLBTOP, TRANST
232 INTEGER ICCOL1, ICCOL2, ICOFFC1, ICOFFC2, ICOFFV,
233 $ icrow1, icrow2, ictxt, iibeg, iic1, iic2,
234 $ iiend, iinxt, iiv, ileft, info, ioffc2, ioffv,
235 $ ipt, ipv, ipw, iroffc1, iroffc2, itop, ivcol,
236 $ ivrow, jjbeg, jjend, jjnxt, jjc1, jjc2, jjv,
237 $ ldc, ldv, lv, lw, mbc, mbv, mpc1, mpc2, mpc20,
238 $ mqv, mqv0, mycol, mydist, myrow, nbc, nbv,
239 $ npcol, nprow, nqc1, nqc2, nqcall, nqv
240* ..
241* .. External Subroutines ..
242 EXTERNAL blacs_abort, blacs_gridinfo, dgebr2d,
243 $ dgebs2d,dgemm, dgsum2d, dlamov,
244 $ dlaset, dtrbr2d, dtrbs2d, dtrmm,
245 $ infog2l, pbdmatadd, pbdtran, pb_topget, pxerbla
246* ..
247* .. Intrinsic Functions ..
248 INTRINSIC max, min, mod
249* ..
250* .. External Functions ..
251 LOGICAL LSAME
252 INTEGER ICEIL, NUMROC
253 EXTERNAL iceil, lsame, numroc
254* ..
255* .. Executable Statements ..
256*
257* Quick return if possible
258*
259 IF( m.LE.0 .OR. n.LE.0 .OR. k.LE.0 )
260 $ RETURN
261*
262* Get grid parameters
263*
264 ictxt = descc( ctxt_ )
265 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
266*
267* Check for currently supported options
268*
269 info = 0
270 IF( .NOT.lsame( direct, 'b' ) ) THEN
271 INFO = -3
272.NOT. ELSE IF( LSAME( STOREV, 'r' ) ) THEN
273 INFO = -4
274 END IF
275.NE. IF( INFO0 ) THEN
276 CALL PXERBLA( ICTXT, 'pdlarzb', -INFO )
277 CALL BLACS_ABORT( ICTXT, 1 )
278 RETURN
279 END IF
280*
281 LEFT = LSAME( SIDE, 'l' )
282 IF( LSAME( TRANS, 'n' ) ) THEN
283 TRANST = 't'
284 ELSE
285 TRANST = 'n'
286 END IF
287*
288 CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV,
289 $ IVROW, IVCOL )
290 MBV = DESCV( MB_ )
291 NBV = DESCV( NB_ )
292 ICOFFV = MOD( JV-1, NBV )
293 NQV = NUMROC( L+ICOFFV, NBV, MYCOL, IVCOL, NPCOL )
294.EQ. IF( MYCOLIVCOL )
295 $ NQV = NQV - ICOFFV
296 LDV = DESCV( LLD_ )
297 IIV = MIN( IIV, LDV )
298 JJV = MIN( JJV, MAX( 1, NUMROC( DESCV( N_ ), NBV, MYCOL,
299 $ DESCV( CSRC_ ), NPCOL ) ) )
300 IOFFV = IIV + ( JJV-1 ) * LDV
301 MBC = DESCC( MB_ )
302 NBC = DESCC( NB_ )
303 NQCALL = NUMROC( DESCC( N_ ), NBC, MYCOL, DESCC( CSRC_ ), NPCOL )
304 CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, IIC1,
305 $ JJC1, ICROW1, ICCOL1 )
306 LDC = DESCC( LLD_ )
307 IIC1 = MIN( IIC1, LDC )
308 JJC1 = MIN( JJC1, MAX( 1, NQCALL ) )
309*
310 IF( LEFT ) THEN
311 IROFFC1 = MOD( IC-1, MBC )
312 MPC1 = NUMROC( K+IROFFC1, MBC, MYROW, ICROW1, NPROW )
313.EQ. IF( MYROWICROW1 )
314 $ MPC1 = MPC1 - IROFFC1
315 ICOFFC1 = MOD( JC-1, NBC )
316 NQC1 = NUMROC( N+ICOFFC1, NBC, MYCOL, ICCOL1, NPCOL )
317.EQ. IF( MYCOLICCOL1 )
318 $ NQC1 = NQC1 - ICOFFC1
319 CALL INFOG2L( IC+M-L, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL,
320 $ IIC2, JJC2, ICROW2, ICCOL2 )
321 IROFFC2 = MOD( IC+M-L-1, MBC )
322 MPC2 = NUMROC( L+IROFFC2, MBC, MYROW, ICROW2, NPROW )
323.EQ. IF( MYROWICROW2 )
324 $ MPC2 = MPC2 - IROFFC2
325 ICOFFC2 = ICOFFC1
326 NQC2 = NQC1
327 ELSE
328 IROFFC1 = MOD( IC-1, MBC )
329 MPC1 = NUMROC( M+IROFFC1, MBC, MYROW, ICROW1, NPROW )
330.EQ. IF( MYROWICROW1 )
331 $ MPC1 = MPC1 - IROFFC1
332 ICOFFC1 = MOD( JC-1, NBC )
333 NQC1 = NUMROC( K+ICOFFC1, NBC, MYCOL, ICCOL1, NPCOL )
334.EQ. IF( MYCOLICCOL1 )
335 $ NQC1 = NQC1 - ICOFFC1
336 CALL INFOG2L( IC, JC+N-L, DESCC, NPROW, NPCOL, MYROW, MYCOL,
337 $ IIC2, JJC2, ICROW2, ICCOL2 )
338 IROFFC2 = IROFFC1
339 MPC2 = MPC1
340 ICOFFC2 = MOD( JC+N-L-1, NBC )
341 NQC2 = NUMROC( L+ICOFFC2, NBC, MYCOL, ICCOL2, NPCOL )
342.EQ. IF( MYCOLICCOL2 )
343 $ NQC2 = NQC2 - ICOFFC2
344 END IF
345 IIC2 = MIN( IIC2, LDC )
346 JJC2 = MIN( JJC2, NQCALL )
347 IOFFC2 = IIC2 + ( JJC2-1 ) * LDC
348*
349 IF( LSAME( SIDE, 'l' ) ) THEN
350*
351* Form Q*sub( C ) or Q'*sub( C )
352*
353* IROFFC2 = ICOFFV is required by the current transposition
354* routine PBDTRAN
355*
356 MQV0 = NUMROC( M+ICOFFV, NBV, MYCOL, IVCOL, NPCOL )
357.EQ. IF( MYCOLIVCOL ) THEN
358 MQV = MQV0 - ICOFFV
359 ELSE
360 MQV = MQV0
361 END IF
362.EQ. IF( MYROWICROW2 ) THEN
363 MPC20 = MPC2 + IROFFC2
364 ELSE
365 MPC20 = MPC2
366 END IF
367*
368* Locally V( IOFFV ) is K x MQV, C( IOFFC2 ) is MPC2 x NQC2
369* WORK( IPV ) is MPC20 x K = [ . V( IOFFV ) ]'
370* WORK( IPW ) is K x MQV0 = [ . V( IOFFV ) ]
371* WORK( IPT ) is the workspace for PBDTRAN
372*
373 IPV = 1
374 IPW = IPV + MPC20 * K
375 IPT = IPW + K * MQV0
376 LV = MAX( 1, MPC20 )
377 LW = MAX( 1, K )
378*
379.EQ. IF( MYROWIVROW ) THEN
380.EQ. IF( MYCOLIVCOL ) THEN
381 CALL DLAMOV( 'all', K, MQV, V( IOFFV ), LDV,
382 $ WORK( IPW+ICOFFV*LW ), LW )
383 ELSE
384 CALL DLAMOV( 'all', K, MQV, V( IOFFV ), LDV,
385 $ WORK( IPW ), LW )
386 END IF
387 END IF
388*
389* WORK( IPV ) = WORK( IPW )' (replicated) is MPC20 x K
390*
391 CALL PBDTRAN( ICTXT, 'rowwise', 'transpose', K, M+ICOFFV,
392 $ DESCV( NB_ ), WORK( IPW ), LW, ZERO,
393 $ WORK( IPV ), LV, IVROW, IVCOL, ICROW2, -1,
394 $ WORK( IPT ) )
395*
396* WORK( IPV ) = ( . V )' -> WORK( IPV ) = V' is MPC2 x K
397*
398.EQ. IF( MYROWICROW2 )
399 $ IPV = IPV + IROFFC2
400*
401* WORK( IPW ) becomes NQC2 x K = C( IOFFC2 )' * V'
402* WORK( IPW ) = C( IOFFC2 )' * V' (NQC2 x MPC2 x K) -> NQC2 x K
403*
404 LW = MAX( 1, NQC2 )
405*
406.GT. IF( MPC20 ) THEN
407 CALL DGEMM( 'transpose', 'no transpose', NQC2, K, MPC2,
408 $ ONE, C( IOFFC2 ), LDC, WORK( IPV ), LV, ZERO,
409 $ WORK( IPW ), LW )
410 ELSE
411 CALL DLASET( 'all', NQC2, K, ZERO, ZERO, WORK( IPW ), LW )
412 END IF
413*
414* WORK( IPW ) = WORK( IPW ) + C1 ( NQC1 = NQC2 )
415*
416.GT. IF( MPC10 ) THEN
417 MYDIST = MOD( MYROW-ICROW1+NPROW, NPROW )
418 ITOP = MAX( 0, MYDIST * MBC - IROFFC1 )
419 IIBEG = IIC1
420 IIEND = IIC1 + MPC1 - 1
421 IINXT = MIN( ICEIL( IIBEG, MBC ) * MBC, IIEND )
422*
423 10 CONTINUE
424.LE. IF( IIBEGIINXT ) THEN
425 CALL PBDMATADD( ICTXT, 'transpose', NQC2, IINXT-IIBEG+1,
426 $ ONE, C( IIBEG+(JJC1-1)*LDC ), LDC, ONE,
427 $ WORK( IPW+ITOP ), LW )
428 MYDIST = MYDIST + NPROW
429 ITOP = MYDIST * MBC - IROFFC1
430 IIBEG = IINXT +1
431 IINXT = MIN( IINXT+MBC, IIEND )
432 GO TO 10
433 END IF
434 END IF
435*
436 CALL DGSUM2D( ICTXT, 'columnwise', ' ', NQC2, K, WORK( IPW ),
437 $ LW, IVROW, MYCOL )
438*
439* WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
440*
441.EQ. IF( MYROWIVROW ) THEN
442.EQ. IF( MYCOLIVCOL ) THEN
443*
444* Broadcast the block reflector to the other columns.
445*
446 CALL DTRBS2D( ICTXT, 'rowwise', ' ', 'lower', 'non unit',
447 $ K, K, T, MBV )
448 ELSE
449 CALL DTRBR2D( ICTXT, 'rowwise', ' ', 'lower', 'non unit',
450 $ K, K, T, MBV, MYROW, IVCOL )
451 END IF
452 CALL DTRMM( 'right', 'lower', TRANST, 'non unit', NQC2, K,
453 $ ONE, T, MBV, WORK( IPW ), LW )
454*
455 CALL DGEBS2D( ICTXT, 'columnwise', ' ', NQC2, K,
456 $ WORK( IPW ), LW )
457 ELSE
458 CALL DGEBR2D( ICTXT, 'columnwise', ' ', NQC2, K,
459 $ WORK( IPW ), LW, IVROW, MYCOL )
460 END IF
461*
462* C1 = C1 - WORK( IPW )
463*
464.GT. IF( MPC10 ) THEN
465 MYDIST = MOD( MYROW-ICROW1+NPROW, NPROW )
466 ITOP = MAX( 0, MYDIST * MBC - IROFFC1 )
467 IIBEG = IIC1
468 IIEND = IIC1 + MPC1 - 1
469 IINXT = MIN( ICEIL( IIBEG, MBC ) * MBC, IIEND )
470*
471 20 CONTINUE
472.LE. IF( IIBEGIINXT ) THEN
473 CALL PBDMATADD( ICTXT, 'transpose', IINXT-IIBEG+1, NQC2,
474 $ -ONE, WORK( IPW+ITOP ), LW, ONE,
475 $ C( IIBEG+(JJC1-1)*LDC ), LDC )
476 MYDIST = MYDIST + NPROW
477 ITOP = MYDIST * MBC - IROFFC1
478 IIBEG = IINXT +1
479 IINXT = MIN( IINXT+MBC, IIEND )
480 GO TO 20
481 END IF
482 END IF
483*
484* C2 C2 - V' * W'
485* C( IOFFC2 ) = C( IOFFC2 ) - WORK( IPV ) * WORK( IPW )'
486* MPC2 x NQC2 MPC2 x K K x NQC2
487*
488 CALL DGEMM( 'no transpose', 'transpose', MPC2, NQC2, K, -ONE,
489 $ WORK( IPV ), LV, WORK( IPW ), LW, ONE,
490 $ C( IOFFC2 ), LDC )
491*
492 ELSE
493*
494* Form sub( C ) * Q or sub( C ) * Q'
495*
496* Locally V( IOFFV ) is K x NQV, C( IOFFC2 ) is MPC2 x NQC2
497* WORK( IPV ) is K x NQV = V( IOFFV ), NQV = NQC2
498* WORK( IPW ) is MPC2 x K = C( IOFFC2 ) * V( IOFFV )'
499*
500 IPV = 1
501 IPW = IPV + K * NQC2
502 LV = MAX( 1, K )
503 LW = MAX( 1, MPC2 )
504*
505* Broadcast V to the other process rows.
506*
507 CALL PB_TOPGET( ICTXT, 'broadcast', 'columnwise', COLBTOP )
508.EQ. IF( MYROWIVROW ) THEN
509 CALL DGEBS2D( ICTXT, 'columnwise', COLBTOP, K, NQC2,
510 $ V( IOFFV ), LDV )
511.EQ. IF( MYCOLIVCOL )
512 $ CALL DTRBS2D( ICTXT, 'columnwise', COLBTOP, 'lower',
513 $ 'non unit', K, K, T, MBV )
514 CALL DLAMOV( 'all', K, NQC2, V( IOFFV ), LDV, WORK( IPV ),
515 $ LV )
516 ELSE
517 CALL DGEBR2D( ICTXT, 'columnwise', COLBTOP, K, NQC2,
518 $ WORK( IPV ), LV, IVROW, MYCOL )
519.EQ. IF( MYCOLIVCOL )
520 $ CALL DTRBR2D( ICTXT, 'columnwise', COLBTOP, 'lower',
521 $ 'non unit', K, K, T, MBV, IVROW, MYCOL )
522 END IF
523*
524* WORK( IPV ) is K x NQC2 = V = V( IOFFV )
525* WORK( IPW ) = C( IOFFC2 ) * V' (MPC2 x NQC2 x K) -> MPC2 x K
526*
527.GT. IF( NQC20 ) THEN
528 CALL DGEMM( 'no transpose', 'Transpose', mpc2, k, nqc2,
529 $ one, c( ioffc2 ), ldc, work( ipv ), lv, zero,
530 $ work( ipw ), lw )
531 ELSE
532 CALL dlaset( 'All', mpc2, k, zero, zero, work( ipw ), lw )
533 END IF
534*
535* WORK( IPW ) = WORK( IPW ) + C1 ( MPC1 = MPC2 )
536*
537 IF( nqc1.GT.0 ) THEN
538 mydist = mod( mycol-iccol1+npcol, npcol )
539 ileft = max( 0, mydist * nbc - icoffc1 )
540 jjbeg = jjc1
541 jjend = jjc1 + nqc1 - 1
542 jjnxt = min( iceil( jjbeg, nbc ) * nbc, jjend )
543*
544 30 CONTINUE
545 IF( jjbeg.LE.jjnxt ) THEN
546 CALL pbdmatadd( ictxt, 'No transpose', mpc2,
547 $ jjnxt-jjbeg+1, one,
548 $ c( iic1+(jjbeg-1)*ldc ), ldc, one,
549 $ work( ipw+ileft*lw ), lw )
550 mydist = mydist + npcol
551 ileft = mydist * nbc - icoffc1
552 jjbeg = jjnxt +1
553 jjnxt = min( jjnxt+nbc, jjend )
554 GO TO 30
555 END IF
556 END IF
557*
558 CALL dgsum2d( ictxt, 'Rowwise', ' ', mpc2, k, work( ipw ),
559 $ lw, myrow, ivcol )
560*
561* WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
562*
563 IF( mycol.EQ.ivcol ) THEN
564 CALL dtrmm( 'Right', 'lower', TRANS, 'non unit', MPC2, K,
565 $ ONE, T, MBV, WORK( IPW ), LW )
566 CALL DGEBS2D( ICTXT, 'rowwise', ' ', MPC2, K, WORK( IPW ),
567 $ LW )
568 ELSE
569 CALL DGEBR2D( ICTXT, 'rowwise', ' ', MPC2, K, WORK( IPW ),
570 $ LW, MYROW, IVCOL )
571 END IF
572*
573* C1 = C1 - WORK( IPW )
574*
575.GT. IF( NQC10 ) THEN
576 MYDIST = MOD( MYCOL-ICCOL1+NPCOL, NPCOL )
577 ILEFT = MAX( 0, MYDIST * NBC - ICOFFC1 )
578 JJBEG = JJC1
579 JJEND = JJC1 + NQC1 - 1
580 JJNXT = MIN( ICEIL( JJBEG, NBC ) * NBC, JJEND )
581*
582 40 CONTINUE
583.LE. IF( JJBEGJJNXT ) THEN
584 CALL PBDMATADD( ICTXT, 'no transpose', MPC2,
585 $ JJNXT-JJBEG+1, -ONE,
586 $ WORK( IPW+ILEFT*LW ), LW, ONE,
587 $ C( IIC1+(JJBEG-1)*LDC ), LDC )
588 MYDIST = MYDIST + NPCOL
589 ILEFT = MYDIST * NBC - ICOFFC1
590 JJBEG = JJNXT +1
591 JJNXT = MIN( JJNXT+NBC, JJEND )
592 GO TO 40
593 END IF
594 END IF
595*
596* C2 C2 - W * V
597* C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV )
598* MPC2 x NQC2 MPC2 x K K x NQC2
599*
600.GT. IF( IOFFC20 )
601 $ CALL DGEMM( 'no transpose', 'no transpose', MPC2, NQC2, K,
602 $ -ONE, WORK( IPW ), LW, WORK( IPV ), LV, ONE,
603 $ C( IOFFC2 ), LDC )
604*
605 END IF
606*
607 RETURN
608*
609* End of PDLARZB
610*
611 END
subroutine dlaset(uplo, m, n, alpha, beta, a, lda)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition dlaset.f:110
subroutine dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
DGEMM
Definition dgemm.f:187
subroutine dtrmm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
DTRMM
Definition dtrmm.f:177
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
subroutine dgebs2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1082
subroutine pxerbla(contxt, srname, info)
Definition mpi.f:1600
subroutine dgebr2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1123
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition mpi.f:937
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
subroutine pbdmatadd(icontxt, mode, m, n, alpha, a, lda, beta, b, ldb)
Definition pbdmatadd.f:3
subroutine pbdtran(icontxt, adist, trans, m, n, nb, a, lda, beta, c, ldc, iarow, iacol, icrow, iccol, work)
Definition pbdtran.f:3
subroutine pdlarzb(side, trans, direct, storev, m, n, k, l, v, iv, jv, descv, t, c, ic, jc, descc, work)
Definition pdlarzb.f:3