OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pclarfb.f
Go to the documentation of this file.
1 SUBROUTINE pclarfb( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, IV,
2 $ JV, DESCV, T, C, IC, JC, DESCC, WORK )
3*
4* -- ScaLAPACK auxiliary routine (version 2.0.2) --
5* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
6* May 1 2012
7*
8* .. Scalar Arguments ..
9 CHARACTER SIDE, TRANS, DIRECT, STOREV
10 INTEGER IC, IV, JC, JV, K, M, N
11* ..
12* .. Array Arguments ..
13 INTEGER DESCC( * ), DESCV( * )
14 COMPLEX C( * ), T( * ), V( * ), WORK( * )
15* ..
16*
17* Purpose
18* =======
19*
20* PCLARFB applies a complex block reflector Q or its conjugate
21* transpose Q**H to a complex M-by-N distributed matrix sub( C )
22* denoting C(IC:IC+M-1,JC:JC+N-1), from the left or the right.
23*
24* Notes
25* =====
26*
27* Each global data object is described by an associated description
28* vector. This vector stores the information required to establish
29* the mapping between an object element and its corresponding process
30* and memory location.
31*
32* Let A be a generic term for any 2D block cyclicly distributed array.
33* Such a global array has an associated description vector DESCA.
34* In the following comments, the character _ should be read as
35* "of the global array".
36*
37* NOTATION STORED IN EXPLANATION
38* --------------- -------------- --------------------------------------
39* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
40* DTYPE_A = 1.
41* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
42* the BLACS process grid A is distribu-
43* ted over. The context itself is glo-
44* bal, but the handle (the integer
45* value) may vary.
46* M_A (global) DESCA( M_ ) The number of rows in the global
47* array A.
48* N_A (global) DESCA( N_ ) The number of columns in the global
49* array A.
50* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
51* the rows of the array.
52* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
53* the columns of the array.
54* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
55* row of the array A is distributed.
56* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
57* first column of the array A is
58* distributed.
59* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
60* array. LLD_A >= MAX(1,LOCr(M_A)).
61*
62* Let K be the number of rows or columns of a distributed matrix,
63* and assume that its process grid has dimension p x q.
64* LOCr( K ) denotes the number of elements of K that a process
65* would receive if K were distributed over the p processes of its
66* process column.
67* Similarly, LOCc( K ) denotes the number of elements of K that a
68* process would receive if K were distributed over the q processes of
69* its process row.
70* The values of LOCr() and LOCc() may be determined via a call to the
71* ScaLAPACK tool function, NUMROC:
72* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
73* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
74* An upper bound for these quantities may be computed by:
75* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
76* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
77*
78* Arguments
79* =========
80*
81* SIDE (global input) CHARACTER
82* = 'L': apply Q or Q**H from the Left;
83* = 'R': apply Q or Q**H from the Right.
84*
85* TRANS (global input) CHARACTER
86* = 'N': No transpose, apply Q;
87* = 'C': Conjugate transpose, apply Q**H.
88*
89* DIRECT (global input) CHARACTER
90* Indicates how Q is formed from a product of elementary
91* reflectors
92* = 'F': Q = H(1) H(2) . . . H(k) (Forward)
93* = 'B': Q = H(k) . . . H(2) H(1) (Backward)
94*
95* STOREV (global input) CHARACTER
96* Indicates how the vectors which define the elementary
97* reflectors are stored:
98* = 'C': Columnwise
99* = 'R': Rowwise
100*
101* M (global input) INTEGER
102* The number of rows to be operated on i.e the number of rows
103* of the distributed submatrix sub( C ). M >= 0.
104*
105* N (global input) INTEGER
106* The number of columns to be operated on i.e the number of
107* columns of the distributed submatrix sub( C ). N >= 0.
108*
109* K (global input) INTEGER
110* The order of the matrix T (= the number of elementary
111* reflectors whose product defines the block reflector).
112*
113* V (local input) COMPLEX pointer into the local memory
114* to an array of dimension ( LLD_V, LOCc(JV+K-1) ) if
115* STOREV = 'C', ( LLD_V, LOCc(JV+M-1)) if STOREV = 'R' and
116* SIDE = 'L', ( LLD_V, LOCc(JV+N-1) ) if STOREV = 'R' and
117* SIDE = 'R'. It contains the local pieces of the distributed
118* vectors V representing the Householder transformation.
119* See further details.
120* If STOREV = 'C' and SIDE = 'L', LLD_V >= MAX(1,LOCr(IV+M-1));
121* if STOREV = 'C' and SIDE = 'R', LLD_V >= MAX(1,LOCr(IV+N-1));
122* if STOREV = 'R', LLD_V >= LOCr(IV+K-1).
123*
124* IV (global input) INTEGER
125* The row index in the global array V indicating the first
126* row of sub( V ).
127*
128* JV (global input) INTEGER
129* The column index in the global array V indicating the
130* first column of sub( V ).
131*
132* DESCV (global and local input) INTEGER array of dimension DLEN_.
133* The array descriptor for the distributed matrix V.
134*
135* T (local input) COMPLEX array, dimension MB_V by MB_V
136* if STOREV = 'R' and NB_V by NB_V if STOREV = 'C'. The trian-
137* gular matrix T in the representation of the block reflector.
138*
139* C (local input/local output) COMPLEX pointer into the
140* local memory to an array of dimension (LLD_C,LOCc(JC+N-1)).
141* On entry, the M-by-N distributed matrix sub( C ). On exit,
142* sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) or
143* sub( C )*Q or sub( C )*Q'.
144*
145* IC (global input) INTEGER
146* The row index in the global array C indicating the first
147* row of sub( C ).
148*
149* JC (global input) INTEGER
150* The column index in the global array C indicating the
151* first column of sub( C ).
152*
153* DESCC (global and local input) INTEGER array of dimension DLEN_.
154* The array descriptor for the distributed matrix C.
155*
156* WORK (local workspace) COMPLEX array, dimension (LWORK)
157* If STOREV = 'C',
158* if SIDE = 'L',
159* LWORK >= ( NqC0 + MpC0 ) * K
160* else if SIDE = 'R',
161* LWORK >= ( NqC0 + MAX( NpV0 + NUMROC( NUMROC( N+ICOFFC,
162* NB_V, 0, 0, NPCOL ), NB_V, 0, 0, LCMQ ),
163* MpC0 ) ) * K
164* end if
165* else if STOREV = 'R',
166* if SIDE = 'L',
167* LWORK >= ( MpC0 + MAX( MqV0 + NUMROC( NUMROC( M+IROFFC,
168* MB_V, 0, 0, NPROW ), MB_V, 0, 0, LCMP ),
169* NqC0 ) ) * K
170* else if SIDE = 'R',
171* LWORK >= ( MpC0 + NqC0 ) * K
172* end if
173* end if
174*
175* where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ),
176*
177* IROFFV = MOD( IV-1, MB_V ), ICOFFV = MOD( JV-1, NB_V ),
178* IVROW = INDXG2P( IV, MB_V, MYROW, RSRC_V, NPROW ),
179* IVCOL = INDXG2P( JV, NB_V, MYCOL, CSRC_V, NPCOL ),
180* MqV0 = NUMROC( M+ICOFFV, NB_V, MYCOL, IVCOL, NPCOL ),
181* NpV0 = NUMROC( N+IROFFV, MB_V, MYROW, IVROW, NPROW ),
182*
183* IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),
184* ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),
185* ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),
186* MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),
187* NpC0 = NUMROC( N+ICOFFC, MB_C, MYROW, ICROW, NPROW ),
188* NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),
189*
190* ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
191* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
192* the subroutine BLACS_GRIDINFO.
193*
194* Alignment requirements
195* ======================
196*
197* The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)
198* must verify some alignment properties, namely the following
199* expressions should be true:
200*
201* If STOREV = 'Columnwise'
202* If SIDE = 'Left',
203* ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )
204* If SIDE = 'Right',
205* ( MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )
206* else if STOREV = 'Rowwise'
207* If SIDE = 'Left',
208* ( NB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )
209* If SIDE = 'Right',
210* ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )
211* end if
212*
213* =====================================================================
214*
215* .. Parameters ..
216 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
217 $ lld_, mb_, m_, nb_, n_, rsrc_
218 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
219 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
220 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
221 COMPLEX ONE, ZERO
222 parameter( one = ( 1.0e+0, 0.0e+0 ),
223 $ zero = ( 0.0e+0, 0.0e+0 ) )
224* ..
225* .. Local Scalars ..
226 LOGICAL FORWARD
227 CHARACTER COLBTOP, ROWBTOP, TRANST, UPLO
228 INTEGER HEIGHT, IBASE, ICCOL, ICOFFC, ICOFFV, ICROW,
229 $ ictxt, ii, iibeg, iic, iiend, iinxt, iiv,
230 $ ilastcol, ilastrow, ileft, ioff, ioffc, ioffv,
231 $ ipt, ipv, ipw, ipw1, iright, iroffc, iroffv,
232 $ itop, ivcol, ivrow, jj, jjbeg, jjc, jjend,
233 $ jjnxt, jjv, kp, kq, ldc, ldv, lv, lw, mbv, mpc,
234 $ mpc0, mqv, mqv0, mycol, mydist, myrow, nbv,
235 $ npv, npv0, npcol, nprow, nqc, nqc0, wide
236* ..
237* .. External Subroutines ..
239 $ cgsum2d, clamov, claset, ctrbr2d,
240 $ ctrbs2d, ctrmm, infog1l, infog2l, pb_topget,
241 $ pbctran
242* ..
243* .. Intrinsic Functions ..
244 INTRINSIC max, min, mod
245* ..
246* .. External Functions ..
247 LOGICAL LSAME
248 INTEGER ICEIL, NUMROC
249 EXTERNAL iceil, lsame, numroc
250* ..
251* .. Executable Statements ..
252*
253* Quick return if possible
254*
255 IF( m.LE.0 .OR. n.LE.0 .OR. k.LE.0 )
256 $ RETURN
257*
258* Get grid parameters
259*
260 ictxt = descc( ctxt_ )
261 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
262*
263 IF( lsame( trans, 'N' ) ) THEN
264 transt = 'c'
265 ELSE
266 TRANST = 'n'
267 END IF
268 FORWARD = LSAME( DIRECT, 'f' )
269 IF( FORWARD ) THEN
270 UPLO = 'u'
271 ELSE
272 UPLO = 'l'
273 END IF
274*
275 CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV,
276 $ IVROW, IVCOL )
277 CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, IIC, JJC,
278 $ ICROW, ICCOL )
279 LDC = DESCC( LLD_ )
280 LDV = DESCV( LLD_ )
281 IIC = MIN( IIC, LDC )
282 IIV = MIN( IIV, LDV )
283 IROFFC = MOD( IC-1, DESCC( MB_ ) )
284 ICOFFC = MOD( JC-1, DESCC( NB_ ) )
285 MBV = DESCV( MB_ )
286 NBV = DESCV( NB_ )
287 IROFFV = MOD( IV-1, MBV )
288 ICOFFV = MOD( JV-1, NBV )
289 MPC = NUMROC( M+IROFFC, DESCC( MB_ ), MYROW, ICROW, NPROW )
290 NQC = NUMROC( N+ICOFFC, DESCC( NB_ ), MYCOL, ICCOL, NPCOL )
291.EQ. IF( MYCOLICCOL )
292 $ NQC = NQC - ICOFFC
293.EQ. IF( MYROWICROW )
294 $ MPC = MPC - IROFFC
295 JJC = MIN( JJC, MAX( 1, JJC+NQC-1 ) )
296 JJV = MIN( JJV, MAX( 1, NUMROC( DESCV( N_ ), NBV, MYCOL,
297 $ DESCV( CSRC_ ), NPCOL ) ) )
298 IOFFC = IIC + ( JJC-1 ) * LDC
299 IOFFV = IIV + ( JJV-1 ) * LDV
300*
301 IF( LSAME( STOREV, 'c' ) ) THEN
302*
303* V is stored columnwise
304*
305 IF( LSAME( SIDE, 'l' ) ) THEN
306*
307* Form Q*sub( C ) or Q'*sub( C )
308*
309* Locally V( IOFFV ) is MPV x K, C( IOFFC ) is MPC x NQC
310* WORK( IPV ) is MPC x K = V( IOFFV ), MPC = MPV
311* WORK( IPW ) is NQC x K = C( IOFFC )' * V( IOFFV )
312*
313 IPV = 1
314 IPW = IPV + MPC * K
315 LV = MAX( 1, MPC )
316 LW = MAX( 1, NQC )
317*
318* Broadcast V to the other process columns.
319*
320 CALL PB_TOPGET( ICTXT, 'broadcast', 'rowwise', ROWBTOP )
321.EQ. IF( MYCOLIVCOL ) THEN
322 CALL CGEBS2D( ICTXT, 'rowwise', ROWBTOP, MPC, K,
323 $ V( IOFFV ), LDV )
324.EQ. IF( MYROWIVROW )
325 $ CALL CTRBS2D( ICTXT, 'rowwise', ROWBTOP, UPLO,
326 $ 'non unit', K, K, T, NBV )
327 CALL CLAMOV( 'all', MPC, K, V( IOFFV ), LDV, WORK( IPV ),
328 $ LV )
329 ELSE
330 CALL CGEBR2D( ICTXT, 'rowwise', ROWBTOP, MPC, K,
331 $ WORK( IPV ), LV, MYROW, IVCOL )
332.EQ. IF( MYROWIVROW )
333 $ CALL CTRBR2D( ICTXT, 'rowwise', ROWBTOP, UPLO,
334 $ 'non unit', K, K, T, NBV, MYROW, IVCOL )
335 END IF
336*
337 IF( FORWARD ) THEN
338*
339* WORK(IPV) = ( V1 ) where V1 is unit lower triangular,
340* ( V2 ) zeroes upper triangular part of V1
341*
342 MYDIST = MOD( MYROW-IVROW+NPROW, NPROW )
343 ITOP = MAX( 0, MYDIST*MBV - IROFFV )
344 IIBEG = IIV
345 IIEND = IIBEG + MPC - 1
346 IINXT = MIN( ICEIL( IIBEG, MBV )*MBV, IIEND )
347*
348 10 CONTINUE
349.GT. IF( K-ITOP 0 ) THEN
350 CALL CLASET( 'upper', IINXT-IIBEG+1, K-ITOP, ZERO,
351 $ ONE, WORK( IPV+IIBEG-IIV+ITOP*LV ), LV )
352 MYDIST = MYDIST + NPROW
353 ITOP = MYDIST * MBV - IROFFV
354 IIBEG = IINXT + 1
355 IINXT = MIN( IINXT+MBV, IIEND )
356 GO TO 10
357 END IF
358*
359 ELSE
360*
361* WORK(IPV) = ( V1 ) where V2 is unit upper triangular,
362* ( V2 ) zeroes lower triangular part of V2
363*
364 JJ = JJV
365 IOFF = MOD( IV+M-K-1, MBV )
366 CALL INFOG1L( IV+M-K, MBV, NPROW, MYROW, DESCV( RSRC_ ),
367 $ II, ILASTROW )
368 KP = NUMROC( K+IOFF, MBV, MYROW, ILASTROW, NPROW )
369.EQ. IF( MYROWILASTROW )
370 $ KP = KP - IOFF
371 MYDIST = MOD( MYROW-ILASTROW+NPROW, NPROW )
372 ITOP = MYDIST * MBV - IOFF
373 IBASE = MIN( ITOP+MBV, K )
374 ITOP = MIN( MAX( 0, ITOP ), K )
375*
376 20 CONTINUE
377.LE. IF( JJ( JJV+K-1 ) ) THEN
378 HEIGHT = IBASE - ITOP
379 CALL CLASET( 'all', KP, ITOP-JJ+JJV, ZERO, ZERO,
380 $ WORK( IPV+II-IIV+(JJ-JJV)*LV ), LV )
381 CALL CLASET( 'lower', KP, HEIGHT, ZERO, ONE,
382 $ WORK( IPV+II-IIV+ITOP*LV ), LV )
383 KP = MAX( 0, KP - HEIGHT )
384 II = II + HEIGHT
385 JJ = JJV + IBASE
386 MYDIST = MYDIST + NPROW
387 ITOP = MYDIST * MBV - IOFF
388 IBASE = MIN( ITOP + MBV, K )
389 ITOP = MIN( ITOP, K )
390 GO TO 20
391 END IF
392*
393 END IF
394*
395* WORK( IPW ) = C( IOFFC )' * V (NQC x MPC x K) -> NQC x K
396*
397.GT. IF( MPC0 ) THEN
398 CALL CGEMM( 'conjugate transpose', 'no transpose', NQC,
399 $ K, MPC, ONE, C( IOFFC ), LDC, WORK( IPV ), LV,
400 $ ZERO, WORK( IPW ), LW )
401 ELSE
402 CALL CLASET( 'all', NQC, K, ZERO, ZERO, WORK( IPW ), LW )
403 END IF
404*
405 CALL CGSUM2D( ICTXT, 'columnwise', ' ', NQC, K, WORK( IPW ),
406 $ LW, IVROW, MYCOL )
407*
408.EQ. IF( MYROWIVROW ) THEN
409*
410* WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
411*
412 CALL CTRMM( 'right', UPLO, TRANST, 'non unit', NQC, K,
413 $ ONE, T, NBV, WORK( IPW ), LW )
414 CALL CGEBS2D( ICTXT, 'columnwise', ' ', NQC, K,
415 $ WORK( IPW ), LW )
416 ELSE
417 CALL CGEBR2D( ICTXT, 'columnwise', ' ', nqc, k,
418 $ work( ipw ), lw, ivrow, mycol )
419 END IF
420*
421* C C - V * W'
422* C( IOFFC ) = C( IOFFC ) - WORK( IPV ) * WORK( IPW )'
423* MPC x NQC MPC x K K x NQC
424*
425 CALL cgemm( 'No transpose', 'Conjugate transpose', mpc, nqc,
426 $ k, -one, work( ipv ), lv, work( ipw ), lw, one,
427 $ c( ioffc ), ldc )
428*
429 ELSE
430*
431* Form sub( C )*Q or sub( C )*Q'
432*
433* ICOFFC = IROFFV is required by the current transposition
434* routine PBCTRAN
435*
436 npv0 = numroc( n+iroffv, mbv, myrow, ivrow, nprow )
437 IF( myrow.EQ.ivrow ) THEN
438 npv = npv0 - iroffv
439 ELSE
440 npv = npv0
441 END IF
442 IF( mycol.EQ.iccol ) THEN
443 nqc0 = nqc + icoffc
444 ELSE
445 nqc0 = nqc
446 END IF
447*
448* Locally V( IOFFV ) is NPV x K C( IOFFC ) is MPC x NQC
449* WORK( IPV ) is K x NQC0 = [ . V( IOFFV ) ]'
450* WORK( IPW ) is NPV0 x K = [ . V( IOFFV )' ]'
451* WORK( IPT ) is the workspace for PBCTRAN
452*
453 ipv = 1
454 ipw = ipv + k * nqc0
455 ipt = ipw + npv0 * k
456 lv = max( 1, k )
457 lw = max( 1, npv0 )
458*
459 IF( mycol.EQ.ivcol ) THEN
460 IF( myrow.EQ.ivrow ) THEN
461 CALL claset( 'All', iroffv, k, zero, zero,
462 $ work( ipw ), lw )
463 ipw1 = ipw + iroffv
464 CALL clamov( 'All', npv, k, v( ioffv ), ldv,
465 $ work( ipw1 ), lw )
466 ELSE
467 ipw1 = ipw
468 CALL clamov( 'All', npv, k, v( ioffv ), ldv,
469 $ work( ipw1 ), lw )
470 END IF
471*
472 IF( forward ) THEN
473*
474* WORK(IPW) = ( . V1' V2' )' where V1 is unit lower
475* triangular, zeroes upper triangular part of V1
476*
477 mydist = mod( myrow-ivrow+nprow, nprow )
478 itop = max( 0, mydist*mbv - iroffv )
479 iibeg = iiv
480 iiend = iibeg + npv - 1
481 iinxt = min( iceil( iibeg, mbv )*mbv, iiend )
482*
483 30 CONTINUE
484 IF( ( k-itop ).GT.0 ) THEN
485 CALL claset( 'Upper', iinxt-iibeg+1, k-itop, zero,
486 $ one, work( ipw1+iibeg-iiv+itop*lw ),
487 $ lw )
488 mydist = mydist + nprow
489 itop = mydist * mbv - iroffv
490 iibeg = iinxt + 1
491 iinxt = min( iinxt+mbv, iiend )
492 GO TO 30
493 END IF
494*
495 ELSE
496*
497* WORK( IPW ) = ( . V1' V2' )' where V2 is unit upper
498* triangular, zeroes lower triangular part of V2.
499*
500 jj = jjv
501 CALL infog1l( iv+n-k, mbv, nprow, myrow,
502 $ descv( rsrc_ ), ii, ilastrow )
503 ioff = mod( iv+n-k-1, mbv )
504 kp = numroc( k+ioff, mbv, myrow, ilastrow, nprow )
505 IF( myrow.EQ.ilastrow )
506 $ kp = kp - ioff
507 mydist = mod( myrow-ilastrow+nprow, nprow )
508 itop = mydist * mbv - ioff
509 ibase = min( itop+mbv, k )
510 itop = min( max( 0, itop ), k )
511*
512 40 CONTINUE
513 IF( jj.LE.( jjv+k-1 ) ) THEN
514 height = ibase - itop
515 CALL claset( 'All', kp, itop-jj+jjv, zero, zero,
516 $ work( ipw1+ii-iiv+(jj-jjv)*lw ), lw )
517 CALL claset( 'Lower', kp, height, zero, one,
518 $ work( ipw1+ii-iiv+itop*lw ), lw )
519 kp = max( 0, kp - height )
520 ii = ii + height
521 jj = jjv + ibase
522 mydist = mydist + nprow
523 itop = mydist * mbv - ioff
524 ibase = min( itop + mbv, k )
525 itop = min( itop, k )
526 GO TO 40
527 END IF
528 END IF
529 END IF
530*
531 CALL pbctran( ictxt, 'Columnwise', 'Conjugate transpose',
532 $ n+iroffv, k, mbv, work( ipw ), lw, zero,
533 $ work( ipv ), lv, ivrow, ivcol, -1, iccol,
534 $ work( ipt ) )
535*
536* WORK( IPV ) = ( . V' ) -> WORK( IPV ) = V' is K x NQC
537*
538 IF( mycol.EQ.iccol )
539 $ ipv = ipv + icoffc * lv
540*
541* WORK( IPW ) becomes MPC x K = C( IOFFC ) * V
542* WORK( IPW ) = C( IOFFC ) * V (MPC x NQC x K) -> MPC x K
543*
544 lw = max( 1, mpc )
545*
546 IF( nqc.GT.0 ) THEN
547 CALL cgemm( 'No transpose', 'Conjugate transpose', mpc,
548 $ k, nqc, one, c( ioffc ), ldc, work( ipv ),
549 $ lv, zero, work( ipw ), lw )
550 ELSE
551 CALL claset( 'All', mpc, k, zero, zero, work( ipw ), lw )
552 END IF
553*
554 CALL cgsum2d( ictxt, 'Rowwise', ' ', mpc, k, work( ipw ),
555 $ lw, myrow, ivcol )
556*
557* WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
558*
559 IF( mycol.EQ.ivcol ) THEN
560 IF( myrow.EQ.ivrow ) THEN
561*
562* Broadcast the block reflector to the other rows.
563*
564 CALL ctrbs2d( ictxt, 'Columnwise', ' ', uplo,
565 $ 'Non unit', k, k, t, nbv )
566 ELSE
567 CALL ctrbr2d( ictxt, 'Columnwise', ' ', uplo,
568 $ 'Non unit', k, k, t, nbv, ivrow, mycol )
569 END IF
570 CALL ctrmm( 'right', UPLO, TRANS, 'non unit', MPC, K,
571 $ ONE, T, NBV, WORK( IPW ), LW )
572*
573 CALL CGEBS2D( ICTXT, 'rowwise', ' ', MPC, K, WORK( IPW ),
574 $ LW )
575 ELSE
576 CALL CGEBR2D( ICTXT, 'rowwise', ' ', MPC, K, WORK( IPW ),
577 $ LW, MYROW, IVCOL )
578 END IF
579*
580* C C - W * V'
581* C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV )
582* MPC x NQC MPC x K K x NQC
583*
584 CALL CGEMM( 'no transpose', 'no transpose', MPC, NQC, K,
585 $ -ONE, WORK( IPW ), LW, WORK( IPV ), LV, ONE,
586 $ C( IOFFC ), LDC )
587 END IF
588*
589 ELSE
590*
591* V is stored rowwise
592*
593 IF( LSAME( SIDE, 'l' ) ) THEN
594*
595* Form Q*sub( C ) or Q'*sub( C )
596*
597* IROFFC = ICOFFV is required by the current transposition
598* routine PBCTRAN
599*
600 MQV0 = NUMROC( M+ICOFFV, NBV, MYCOL, IVCOL, NPCOL )
601.EQ. IF( MYCOLIVCOL ) THEN
602 MQV = MQV0 - ICOFFV
603 ELSE
604 MQV = MQV0
605 END IF
606.EQ. IF( MYROWICROW ) THEN
607 MPC0 = MPC + IROFFC
608 ELSE
609 MPC0 = MPC
610 END IF
611*
612* Locally V( IOFFV ) is K x MQV, C( IOFFC ) is MPC x NQC
613* WORK( IPV ) is MPC0 x K = [ . V( IOFFV ) ]'
614* WORK( IPW ) is K x MQV0 = [ . V( IOFFV ) ]
615* WORK( IPT ) is the workspace for PBCTRAN
616*
617 IPV = 1
618 IPW = IPV + MPC0 * K
619 IPT = IPW + K * MQV0
620 LV = MAX( 1, MPC0 )
621 LW = MAX( 1, K )
622*
623.EQ. IF( MYROWIVROW ) THEN
624.EQ. IF( MYCOLIVCOL ) THEN
625 CALL CLASET( 'all', K, ICOFFV, ZERO, ZERO,
626 $ WORK( IPW ), LW )
627 IPW1 = IPW + ICOFFV * LW
628 CALL CLAMOV( 'all', K, MQV, V( IOFFV ), LDV,
629 $ WORK( IPW1 ), LW )
630 ELSE
631 IPW1 = IPW
632 CALL CLAMOV( 'all', K, MQV, V( IOFFV ), LDV,
633 $ WORK( IPW1 ), LW )
634 END IF
635*
636 IF( FORWARD ) THEN
637*
638* WORK( IPW ) = ( . V1 V2 ) where V1 is unit upper
639* triangular, zeroes lower triangular part of V1
640*
641 MYDIST = MOD( MYCOL-IVCOL+NPCOL, NPCOL )
642 ILEFT = MAX( 0, MYDIST * NBV - ICOFFV )
643 JJBEG = JJV
644 JJEND = JJV + MQV - 1
645 JJNXT = MIN( ICEIL( JJBEG, NBV ) * NBV, JJEND )
646*
647 50 CONTINUE
648.GT. IF( ( K-ILEFT )0 ) THEN
649 CALL CLASET( 'lower', K-ILEFT, JJNXT-JJBEG+1, ZERO,
650 $ ONE,
651 $ WORK( IPW1+ILEFT+(JJBEG-JJV)*LW ),
652 $ LW )
653 MYDIST = MYDIST + NPCOL
654 ILEFT = MYDIST * NBV - ICOFFV
655 JJBEG = JJNXT + 1
656 JJNXT = MIN( JJNXT+NBV, JJEND )
657 GO TO 50
658 END IF
659*
660 ELSE
661*
662* WORK( IPW ) = ( . V1 V2 ) where V2 is unit lower
663* triangular, zeroes upper triangular part of V2.
664*
665 II = IIV
666 CALL INFOG1L( JV+M-K, NBV, NPCOL, MYCOL,
667 $ DESCV( CSRC_ ), JJ, ILASTCOL )
668 IOFF = MOD( JV+M-K-1, NBV )
669 KQ = NUMROC( K+IOFF, NBV, MYCOL, ILASTCOL, NPCOL )
670.EQ. IF( MYCOLILASTCOL )
671 $ KQ = KQ - IOFF
672 MYDIST = MOD( MYCOL-ILASTCOL+NPCOL, NPCOL )
673 ILEFT = MYDIST * NBV - IOFF
674 IRIGHT = MIN( ILEFT+NBV, K )
675 ILEFT = MIN( MAX( 0, ILEFT ), K )
676*
677 60 CONTINUE
678.LE. IF( II( IIV+K-1 ) ) THEN
679 WIDE = IRIGHT - ILEFT
680 CALL CLASET( 'all', ILEFT-II+IIV, KQ, ZERO, ZERO,
681 $ WORK( IPW1+II-IIV+(JJ-JJV)*LW ), LW )
682 CALL CLASET( 'upper', WIDE, KQ, ZERO, ONE,
683 $ WORK( IPW1+ILEFT+(JJ-JJV)*LW ), LW )
684 KQ = MAX( 0, KQ - WIDE )
685 II = IIV + IRIGHT
686 JJ = JJ + WIDE
687 MYDIST = MYDIST + NPCOL
688 ILEFT = MYDIST * NBV - IOFF
689 IRIGHT = MIN( ILEFT + NBV, K )
690 ILEFT = MIN( ILEFT, K )
691 GO TO 60
692 END IF
693 END IF
694 END IF
695*
696* WORK( IPV ) = WORK( IPW )' (replicated) is MPC0 x K
697*
698 CALL PBCTRAN( ICTXT, 'rowwise', 'conjugate transpose', K,
699 $ M+ICOFFV, NBV, WORK( IPW ), LW, ZERO,
700 $ WORK( IPV ), LV, IVROW, IVCOL, ICROW, -1,
701 $ WORK( IPT ) )
702*
703* WORK( IPV ) = ( . V )' -> WORK( IPV ) = V' is MPC x K
704*
705.EQ. IF( MYROWICROW )
706 $ IPV = IPV + IROFFC
707*
708* WORK( IPW ) becomes NQC x K = C( IOFFC )' * V'
709* WORK( IPW ) = C( IOFFC )' * V' (NQC x MPC x K) -> NQC x K
710*
711 LW = MAX( 1, NQC )
712*
713.GT. IF( MPC0 ) THEN
714 CALL CGEMM( 'conjugate transpose', 'no transpose', NQC,
715 $ K, MPC, ONE, C( IOFFC ), LDC, WORK( IPV ),
716 $ LV, ZERO, WORK( IPW ), LW )
717 ELSE
718 CALL CLASET( 'all', NQC, K, ZERO, ZERO, WORK( IPW ), LW )
719 END IF
720*
721 CALL CGSUM2D( ICTXT, 'columnwise', ' ', NQC, K, WORK( IPW ),
722 $ LW, IVROW, MYCOL )
723*
724* WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
725*
726.EQ. IF( MYROWIVROW ) THEN
727.EQ. IF( MYCOLIVCOL ) THEN
728*
729* Broadcast the block reflector to the other columns.
730*
731 CALL CTRBS2D( ICTXT, 'rowwise', ' ', UPLO, 'non unit',
732 $ K, K, T, MBV )
733 ELSE
734 CALL CTRBR2D( ICTXT, 'rowwise', ' ', UPLO, 'non unit',
735 $ K, K, T, MBV, MYROW, IVCOL )
736 END IF
737 CALL CTRMM( 'right', UPLO, TRANST, 'non unit', NQC, K,
738 $ ONE, T, MBV, WORK( IPW ), LW )
739*
740 CALL CGEBS2D( ICTXT, 'columnwise', ' ', NQC, K,
741 $ WORK( IPW ), LW )
742 ELSE
743 CALL CGEBR2D( ICTXT, 'columnwise', ' ', NQC, K,
744 $ WORK( IPW ), LW, IVROW, MYCOL )
745 END IF
746*
747* C C - V' * W'
748* C( IOFFC ) = C( IOFFC ) - WORK( IPV ) * WORK( IPW )'
749* MPC x NQC MPC x K K x NQC
750*
751 CALL CGEMM( 'no transpose', 'conjugate transpose', MPC, NQC,
752 $ K, -ONE, WORK( IPV ), LV, WORK( IPW ), LW, ONE,
753 $ C( IOFFC ), LDC )
754*
755 ELSE
756*
757* Form Q*sub( C ) or Q'*sub( C )
758*
759* Locally V( IOFFV ) is K x NQV, C( IOFFC ) is MPC x NQC
760* WORK( IPV ) is K x NQV = V( IOFFV ), NQV = NQC
761* WORK( IPW ) is MPC x K = C( IOFFC ) * V( IOFFV )'
762*
763 IPV = 1
764 IPW = IPV + K * NQC
765 LV = MAX( 1, K )
766 LW = MAX( 1, MPC )
767*
768* Broadcast V to the other process rows.
769*
770 CALL PB_TOPGET( ICTXT, 'broadcast', 'columnwise', COLBTOP )
771.EQ. IF( MYROWIVROW ) THEN
772 CALL CGEBS2D( ICTXT, 'columnwise', COLBTOP, K, NQC,
773 $ V( IOFFV ), LDV )
774.EQ. IF( MYCOLIVCOL )
775 $ CALL CTRBS2D( ICTXT, 'columnwise', COLBTOP, UPLO,
776 $ 'non unit', K, K, T, MBV )
777 CALL CLAMOV( 'all', K, NQC, V( IOFFV ), LDV, WORK( IPV ),
778 $ LV )
779 ELSE
780 CALL CGEBR2D( ICTXT, 'columnwise', COLBTOP, K, NQC,
781 $ WORK( IPV ), LV, IVROW, MYCOL )
782.EQ. IF( MYCOLIVCOL )
783 $ CALL CTRBR2D( ICTXT, 'columnwise', COLBTOP, UPLO,
784 $ 'non unit', K, K, T, MBV, IVROW, MYCOL )
785 END IF
786*
787 IF( FORWARD ) THEN
788*
789* WORK(IPW) = ( V1 V2 ) where V1 is unit upper
790* triangular, zeroes lower triangular part of V1
791*
792 MYDIST = MOD( MYCOL-IVCOL+NPCOL, NPCOL )
793 ILEFT = MAX( 0, MYDIST * NBV - ICOFFV )
794 JJBEG = JJV
795 JJEND = JJV + NQC - 1
796 JJNXT = MIN( ICEIL( JJBEG, NBV ) * NBV, JJEND )
797*
798 70 CONTINUE
799.GT. IF( ( K-ILEFT )0 ) THEN
800 CALL CLASET( 'lower', K-ILEFT, JJNXT-JJBEG+1, ZERO,
801 $ ONE, WORK( IPV+ILEFT+(JJBEG-JJV)*LV ),
802 $ LV )
803 MYDIST = MYDIST + NPCOL
804 ILEFT = MYDIST * NBV - ICOFFV
805 JJBEG = JJNXT + 1
806 JJNXT = MIN( JJNXT+NBV, JJEND )
807 GO TO 70
808 END IF
809*
810 ELSE
811*
812* WORK( IPW ) = ( . V1 V2 ) where V2 is unit lower
813* triangular, zeroes upper triangular part of V2.
814*
815 II = IIV
816 CALL INFOG1L( JV+N-K, NBV, NPCOL, MYCOL, DESCV( CSRC_ ),
817 $ JJ, ILASTCOL )
818 IOFF = MOD( JV+N-K-1, NBV )
819 KQ = NUMROC( K+IOFF, NBV, MYCOL, ILASTCOL, NPCOL )
820.EQ. IF( MYCOLILASTCOL )
821 $ KQ = KQ - IOFF
822 MYDIST = MOD( MYCOL-ILASTCOL+NPCOL, NPCOL )
823 ILEFT = MYDIST * NBV - IOFF
824 IRIGHT = MIN( ILEFT+NBV, K )
825 ILEFT = MIN( MAX( 0, ILEFT ), K )
826*
827 80 CONTINUE
828.LE. IF( II( IIV+K-1 ) ) THEN
829 WIDE = IRIGHT - ILEFT
830 CALL CLASET( 'all', ILEFT-II+IIV, KQ, ZERO, ZERO,
831 $ WORK( IPV+II-IIV+(JJ-JJV)*LV ), LV )
832 CALL CLASET( 'upper', WIDE, KQ, ZERO, ONE,
833 $ WORK( IPV+ILEFT+(JJ-JJV)*LV ), LV )
834 KQ = MAX( 0, KQ - WIDE )
835 II = IIV + IRIGHT
836 JJ = JJ + WIDE
837 MYDIST = MYDIST + NPCOL
838 ILEFT = MYDIST * NBV - IOFF
839 IRIGHT = MIN( ILEFT + NBV, K )
840 ILEFT = MIN( ILEFT, K )
841 GO TO 80
842 END IF
843*
844 END IF
845*
846* WORK( IPV ) is K x NQC = V = V( IOFFV )
847* WORK( IPW ) = C( IOFFC ) * V' (MPC x NQC x K) -> MPC x K
848*
849.GT. IF( NQC0 ) THEN
850 CALL CGEMM( 'no transpose', 'conjugate transpose', MPC,
851 $ K, NQC, ONE, C( IOFFC ), LDC, WORK( IPV ),
852 $ LV, ZERO, WORK( IPW ), LW )
853 ELSE
854 CALL CLASET( 'all', MPC, K, ZERO, ZERO, WORK( IPW ), LW )
855 END IF
856*
857 CALL CGSUM2D( ICTXT, 'rowwise', ' ', MPC, K, WORK( IPW ),
858 $ LW, MYROW, IVCOL )
859*
860* WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
861*
862.EQ. IF( MYCOLIVCOL ) THEN
863 CALL CTRMM( 'right', UPLO, TRANS, 'non unit', MPC, K,
864 $ ONE, T, MBV, WORK( IPW ), LW )
865 CALL CGEBS2D( ICTXT, 'rowwise', ' ', MPC, K, WORK( IPW ),
866 $ LW )
867 ELSE
868 CALL CGEBR2D( ICTXT, 'rowwise', ' ', MPC, K, WORK( IPW ),
869 $ LW, MYROW, IVCOL )
870 END IF
871*
872* C C - W * V
873* C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV )
874* MPC x NQC MPC x K K x NQC
875*
876 CALL CGEMM( 'no transpose', 'no transpose', MPC, NQC, K,
877 $ -ONE, WORK( IPW ), LW, WORK( IPV ), LV, ONE,
878 $ C( IOFFC ), LDC )
879*
880 END IF
881*
882 END IF
883*
884 RETURN
885*
886* End of PCLARFB
887*
888 END
subroutine claset(uplo, m, n, alpha, beta, a, lda)
CLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition claset.f:106
subroutine ctrmm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
CTRMM
Definition ctrmm.f:177
subroutine cgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
CGEMM
Definition cgemm.f:187
subroutine infog1l(gindx, nb, nprocs, myroc, isrcproc, lindx, rocsrc)
Definition infog1l.f:3
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
subroutine cgebs2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1062
subroutine cgebr2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1103
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition mpi.f:937
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
subroutine pbctran(icontxt, adist, trans, m, n, nb, a, lda, beta, c, ldc, iarow, iacol, icrow, iccol, work)
Definition pbctran.f:3
subroutine pclarfb(side, trans, direct, storev, m, n, k, v, iv, jv, descv, t, c, ic, jc, descc, work)
Definition pclarfb.f:3