159 $ IPIV2, WORK, LWORK, INFO )
169 INTEGER N, LDA, LTB, LWORK, INFO
172 INTEGER IPIV( * ), IPIV2( * )
173 COMPLEX*16 A( LDA, * ), TB( * ), WORK( * )
178 COMPLEX*16 CZERO, CONE
179 parameter( czero = ( 0.0d+0, 0.0d+0 ),
180 $ cone = ( 1.0d+0, 0.0d+0 ) )
183 LOGICAL UPPER, TQUERY, WQUERY
184 INTEGER I, J, K, I1, I2, TD
185 INTEGER LDTB, NB, KB, JB, NT, IINFO
191 EXTERNAL lsame, ilaenv
205 upper = lsame( uplo,
'U' )
206 wquery = ( lwork.EQ.-1 )
207 tquery = ( ltb.EQ.-1 )
208 IF( .NOT.upper .AND. .NOT.lsame( uplo,
'L' ) )
THEN
210 ELSE IF( n.LT.0 )
THEN
212 ELSE IF( lda.LT.
max( 1, n ) )
THEN
214 ELSE IF ( ltb .LT. 4*n .AND. .NOT.tquery )
THEN
216 ELSE IF ( lwork .LT. n .AND. .NOT.wquery )
THEN
221 CALL xerbla(
'ZSYTRF_AA_2STAGE', -info )
227 nb = ilaenv( 1,
'ZSYTRF_AA_2STAGE', uplo, n, -1, -1, -1 )
236 IF( tquery .OR. wquery )
THEN
249 IF( ldtb .LT. 3*nb+1 )
THEN
252 IF( lwork .LT. nb*n )
THEN
286 IF( i .EQ. (j-1) )
THEN
291 CALL zgemm(
'NoTranspose',
'NoTranspose',
293 $ cone, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
294 $ a( (i-1)*nb+1, j*nb+1 ), lda,
295 $ czero, work( i*nb+1 ), n )
298 IF( i .EQ. (j-1) )
THEN
303 CALL zgemm(
'NoTranspose',
'NoTranspose',
305 $ cone, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
307 $ a( (i-2)*nb+1, j*nb+1 ), lda,
308 $ czero, work( i*nb+1 ), n )
314 CALL zlacpy(
'Upper', kb, kb, a( j*nb+1, j*nb+1 ), lda,
315 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
318 CALL zgemm(
'Transpose',
'NoTranspose',
320 $ -cone, a( 1, j*nb+1 ), lda,
322 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
324 CALL zgemm(
'Transpose',
'NoTranspose',
326 $ cone, a( (j-1)*nb+1, j*nb+1 ), lda,
327 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
328 $ czero, work( 1 ), n )
329 CALL zgemm(
'NoTranspose',
'NoTranspose',
331 $ -cone, work( 1 ), n,
332 $ a( (j-2)*nb+1, j*nb+1 ), lda,
333 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
340 tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
341 $ = tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
348 CALL ztrsm(
'L',
'U',
'T',
'N', kb, kb, cone,
349 $ a( (j-1)*nb+1, j*nb+1 ), lda,
350 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
351 CALL ztrsm(
'R',
'U',
'N',
'N', kb, kb, cone,
352 $ a( (j-1)*nb+1, j*nb+1 ), lda,
353 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
362 CALL zgemm(
'NoTranspose',
'NoTranspose',
364 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
365 $ a( (j-1)*nb+1, j*nb+1 ), lda,
366 $ czero, work( j*nb+1 ), n )
368 CALL zgemm(
'NoTranspose',
'NoTranspose',
370 $ cone, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
372 $ a( (j-2)*nb+1, j*nb+1 ), lda,
373 $ czero, work( j*nb+1 ), n )
378 CALL zgemm(
'Transpose',
'NoTranspose',
379 $ nb, n-(j+1)*nb, j*nb,
380 $ -cone, work( nb+1 ), n,
381 $ a( 1, (j+1)*nb+1 ), lda,
382 $ cone, a( j*nb+1, (j+1)*nb+1 ), lda )
388 CALL zcopy( n-(j+1)*nb,
389 $ a( j*nb+k, (j+1)*nb+1 ), lda,
390 $ work( 1+(k-1)*n ), 1 )
395 CALL zgetrf( n-(j+1)*nb, nb,
397 $ ipiv( (j+1)*nb+1 ), iinfo )
405 CALL zcopy( n-(j+1)*nb,
406 $ work( 1+(k-1)*n ), 1,
407 $ a( j*nb+k, (j+1)*nb+1 ), lda )
412 kb =
min(nb, n-(j+1)*nb)
413 CALL zlaset(
'Full', kb, nb, czero, czero,
414 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
415 CALL zlacpy(
'Upper', kb, nb,
417 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
419 CALL ztrsm(
'R',
'U',
'N',
'U', kb, nb, cone,
420 $ a( (j-1)*nb+1, j*nb+1 ), lda,
421 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
429 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb )
430 $ = tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
433 CALL zlaset( 'lower
', KB, NB, CZERO, CONE,
434 $ A( J*NB+1, (J+1)*NB+1), LDA )
440 IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB
443 I2 = IPIV( (J+1)*NB+K )
446 CALL ZSWAP( K-1, A( (J+1)*NB+1, I1 ), 1,
447 $ A( (J+1)*NB+1, I2 ), 1 )
450 $ CALL ZSWAP( I2-I1-1, A( I1, I1+1 ), LDA,
454 $ CALL ZSWAP( N-I2, A( I1, I2+1 ), LDA,
455 $ A( I2, I2+1 ), LDA )
458 A( I1, I1 ) = A( I2, I2 )
462 CALL ZSWAP( J*NB, A( 1, I1 ), 1,
483.EQ.
IF( I (J-1) ) THEN
488 CALL ZGEMM( 'notranspose
', 'transpose
',
490 $ CONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1,
491 $ A( J*NB+1, (I-1)*NB+1 ), LDA,
492 $ CZERO, WORK( I*NB+1 ), N )
495.EQ.
IF( I (J-1) ) THEN
500 CALL ZGEMM( 'notranspose
', 'transpose
',
502 $ CONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ),
504 $ A( J*NB+1, (I-2)*NB+1 ), LDA,
505 $ CZERO, WORK( I*NB+1 ), N )
511 CALL ZLACPY( 'lower
', KB, KB, A( J*NB+1, J*NB+1 ), LDA,
512 $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 )
515 CALL ZGEMM( 'notranspose
', 'notranspose
',
517 $ -CONE, A( J*NB+1, 1 ), LDA,
519 $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 )
521 CALL ZGEMM( 'notranspose
', 'notranspose
',
523 $ CONE, A( J*NB+1, (J-1)*NB+1 ), LDA,
524 $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1,
525 $ CZERO, WORK( 1 ), N )
526 CALL ZGEMM( 'notranspose
', 'transpose
',
528 $ -CONE, WORK( 1 ), N,
529 $ A( J*NB+1, (J-2)*NB+1 ), LDA,
530 $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 )
537 TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB )
538 $ = TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB )
545 CALL ZTRSM( 'l
', 'l
', 'n
', 'n
', KB, KB, CONE,
546 $ A( J*NB+1, (J-1)*NB+1 ), LDA,
547 $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 )
548 CALL ZTRSM( 'r
', 'l
', 't
', 'n
', KB, KB, CONE,
549 $ A( J*NB+1, (J-1)*NB+1 ), LDA,
550 $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 )
557 TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB )
558 $ = TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB )
568 CALL ZGEMM( 'notranspose
', 'transpose
',
570 $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1,
571 $ A( J*NB+1, (J-1)*NB+1 ), LDA,
572 $ CZERO, WORK( J*NB+1 ), N )
574 CALL ZGEMM( 'notranspose
', 'transpose
',
576 $ CONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ),
578 $ A( J*NB+1, (J-2)*NB+1 ), LDA,
579 $ CZERO, WORK( J*NB+1 ), N )
584 CALL ZGEMM( 'notranspose
', 'notranspose
',
585 $ N-(J+1)*NB, NB, J*NB,
586 $ -CONE, A( (J+1)*NB+1, 1 ), LDA,
588 $ CONE, A( (J+1)*NB+1, J*NB+1 ), LDA )
593 CALL ZGETRF( N-(J+1)*NB, NB,
594 $ A( (J+1)*NB+1, J*NB+1 ), LDA,
595 $ IPIV( (J+1)*NB+1 ), IINFO )
602 KB = MIN(NB, N-(J+1)*NB)
603 CALL ZLASET( 'full
', KB, NB, CZERO, CZERO,
604 $ TB( TD+NB+1 + (J*NB)*LDTB), LDTB-1 )
605 CALL ZLACPY( 'upper
', KB, NB,
606 $ A( (J+1)*NB+1, J*NB+1 ), LDA,
607 $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 )
609 CALL ZTRSM( 'r
', 'l
', 't
', 'u
', KB, NB, CONE,
610 $ A( J*NB+1, (J-1)*NB+1 ), LDA,
611 $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 )
619 TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) =
620 $ TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB )
623 CALL ZLASET( 'upper
', KB, NB, CZERO, CONE,
624 $ A( (J+1)*NB+1, J*NB+1 ), LDA )
630 IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB
633 I2 = IPIV( (J+1)*NB+K )
636 CALL ZSWAP( K-1, A( I1, (J+1)*NB+1 ), LDA,
637 $ A( I2, (J+1)*NB+1 ), LDA )
640 $ CALL ZSWAP( I2-I1-1, A( I1+1, I1 ), 1,
641 $ A( I2, I1+1 ), LDA )
644 $ CALL ZSWAP( N-I2, A( I2+1, I1 ), 1,
648 A( I1, I1 ) = A( I2, I2 )
652 CALL ZSWAP( J*NB, A( I1, 1 ), LDA,
667 CALL ZGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO )