OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
dlr_core.F File Reference

Go to the source code of this file.

Modules

module  dmumps_lr_core

Functions/Subroutines

subroutine dmumps_lr_core::init_lrb (lrb_out, k, m, n, islr)
subroutine dmumps_lr_core::is_front_blr_candidate (inode, niv, nfront, nass, blron, k489, k490, k491, k492, k20, k60, idad, k38, lrstatus, n, lrgroups)
subroutine dmumps_lr_core::alloc_lrb (lrb_out, k, m, n, islr, iflag, ierror, keep8)
subroutine dmumps_lr_core::alloc_lrb_from_acc (acc_lrb, lrb_out, k, m, n, loru, iflag, ierror, keep8)
subroutine dmumps_lr_core::regrouping2 (cut, npartsass, nass, npartscb, ncb, ibcksz, onlycb, k472)
subroutine dmumps_lr_core::dmumps_lrtrsm (a, la, poselt_local, nfront, lda, lrb, niv, sym, loru, iw, offset_iw)
subroutine dmumps_lr_core::dmumps_lrgemm_scaling (lrb, scaled, a, la, diag, ld_diag, iw2, poseltt, nfront, block, maxi_cluster)
subroutine dmumps_lr_core::dmumps_lrgemm4 (alpha, lrb1, lrb2, beta, a, la, poseltt, nfront, sym, iflag, ierror, midblk_compress, toleps, tol_opt, kpercent, rank, buildq, lua_activated, loru, lrb3, maxi_rank, maxi_cluster, diag, ld_diag, iw2, block)
subroutine dmumps_lr_core::dmumps_decompress_acc (acc_lrb, maxi_cluster, maxi_rank, a, la, poseltt, nfront, niv, loru, count_flops)
subroutine dmumps_lr_core::dmumps_compress_fr_updates (acc_lrb, maxi_cluster, maxi_rank, a, la, poseltt, nfront, niv, toleps, tol_opt, kpercent, buildq, loru, cb_compress)
subroutine dmumps_lr_core::dmumps_recompress_acc (acc_lrb, maxi_cluster, maxi_rank, a, la, poseltt, nfront, niv, midblk_compress, toleps, tol_opt, kpercent_rmb, kpercent_lua, new_acc_rank)
recursive subroutine dmumps_lr_core::dmumps_recompress_acc_narytree (acc_lrb, maxi_cluster, maxi_rank, a, la, poseltt, keep8, nfront, niv, midblk_compress, toleps, tol_opt, kpercent_rmb, kpercent_lua, k478, rank_list, pos_list, nb_nodes, level, acc_tmp)
subroutine dmumps_lr_core::dmumps_recompress_acc_v2 (acc_lrb, maxi_cluster, maxi_rank, a, la, poseltt, nfront, niv, midblk_compress, toleps, tol_opt, kpercent_rmb, kpercent_lua, new_acc_rank)
subroutine dmumps_lr_core::max_cluster (cut, cut_size, maxi_cluster)
subroutine dmumps_lr_core::dmumps_get_lua_order (nb_blocks, order, rank, iwhandler, sym, fs_or_cb, i, j, frfr_updates, lbandslave_in, k474, blr_u_col)
subroutine dmumps_lr_core::dmumps_blr_asm_niv1 (a, la, posel1, nfront, nass1, iwhandler, son_iw, liw, lstk, nelim, k1, k2, sym, keep, keep8, opassw)
subroutine dmumps_truncated_rrqr (m, n, a, lda, jpvt, tau, work, ldw, rwork, toleps, tol_opt, rank, maxrank, info, islr)

Function/Subroutine Documentation

◆ dmumps_truncated_rrqr()

subroutine dmumps_truncated_rrqr ( integer m,
integer n,
double precision, dimension(lda,*) a,
integer lda,
integer, dimension(*) jpvt,
double precision, dimension(*) tau,
double precision, dimension(ldw,*) work,
integer ldw,
double precision, dimension(*) rwork,
double precision toleps,
integer tol_opt,
integer rank,
integer maxrank,
integer info,
logical islr )

Definition at line 1608 of file dlr_core.F.

1611C This routine computes a Rank-Revealing QR factorization of a dense
1612C matrix A. The factorization is truncated when the absolute value of
1613C a diagonal coefficient of the R factor becomes smaller than a
1614C prescribed threshold TOLEPS. The resulting partial Q and R factors
1615C provide a rank-k approximation of the input matrix A with accuracy
1616C TOLEPS.
1617C
1618C This routine is obtained by merging the LAPACK
1619C (http://www.netlib.org/lapack/) CGEQP3 and CLAQPS routines and by
1620C applying a minor modification to the outer factorization loop in
1621C order to stop computations as soon as possible when the required
1622C accuracy is reached.
1623C
1624C Copyright (c) 1992-2017 The University of Tennessee and The
1625C University of Tennessee Research Foundation. All rights reserved.
1626C Copyright (c) 2000-2017 The University of California Berkeley.
1627C All rights reserved.
1628C Copyright (c) 2006-2017 The University of Colorado Denver.
1629C All rights reserved.
1630C
1631C Redistribution and use in source and binary forms, with or without
1632C modification, are permitted provided that the following conditions
1633C are met:
1634C
1635C - Redistributions of source code must retain the above copyright
1636C notice, this list of conditions and the following disclaimer.
1637C
1638C - Redistributions in binary form must reproduce the above
1639C copyright notice, this list of conditions and the following
1640C disclaimer listed in this license in the documentation and/or
1641C other materials provided with the distribution.
1642C
1643C - Neither the name of the copyright holders nor the names of its
1644C contributors may be used to endorse or promote products derived from
1645C this software without specific prior written permission.
1646C
1647C The copyright holders provide no reassurances that the source code
1648C provided does not infringe any patent, copyright, or any other
1649C intellectual property rights of third parties. The copyright holders
1650C disclaim any liability to any recipient for claims brought against
1651C recipient by any third party for infringement of that parties
1652C intellectual property rights.
1653C
1654C THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1655C "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1656C LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1657C A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1658C OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1659C SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1660C LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1661C DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1662C THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1663C (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1664C OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1665C
1666 IMPLICIT NONE
1667C
1668 INTEGER :: INFO, LDA, LDW, M, N, RANK, MAXRANK
1669C TOL_OPT controls the tolerance option used
1670C >0 => use 2-norm (||.||_X = ||.||_2)
1671C <0 => use Frobenius-norm (||.||_X = ||.||_F)
1672C Furthermore, depending on abs(TOL_OPT):
1673C 1 => absolute: ||B_{I(k+1:end),J(k+1:end)}||_X <= TOLEPS
1674C 2 => relative to 2-norm of the compressed block:
1675C ||B_{I(k+1:end),J(k+1:end)}||_X <= TOLEPS*||B_{I,J}||_2
1676C 3 => relative to the max of the 2-norms of the row and column diagonal blocks
1677C ||B_{I(k+1:end),J{k+1:end}}||_X <= TOLEPS*max(||B_{I,I}||_2,||B_{J,J}||_2)
1678C 4 => relative to the sqrt of product of the 2-norms of the row and column diagonal blocks
1679C ||B_{I(k+1:end),J{k+1:end}}||_X <= TOLEPS*sqrt(||B_{I,I}||_2*||B_{J,J}||_2)
1680 INTEGER :: TOL_OPT
1681 DOUBLE PRECISION :: TOLEPS
1682 INTEGER :: JPVT(*)
1683 DOUBLE PRECISION :: RWORK(*)
1684 DOUBLE PRECISION :: A(LDA,*), TAU(*)
1685 DOUBLE PRECISION :: WORK(LDW,*)
1686 LOGICAL :: ISLR
1687 DOUBLE PRECISION :: TOLEPS_EFF, TRUNC_ERR
1688 INTEGER, PARAMETER :: INB=1, inbmin=2
1689 INTEGER :: J, JB, MINMN, NB
1690 INTEGER :: OFFSET, ITEMP
1691 INTEGER :: LSTICC, PVT, K, RK
1692 DOUBLE PRECISION :: TEMP, TEMP2, TOL3Z
1693 DOUBLE PRECISION :: AKK
1694 LOGICAL INADMISSIBLE
1695 DOUBLE PRECISION, PARAMETER :: RZERO=0.0d+0, rone=1.0d+0
1696 DOUBLE PRECISION :: ZERO
1697 DOUBLE PRECISION :: ONE
1698 parameter( one = 1.0d+0 )
1699 parameter( zero = 0.0d+0 )
1700 DOUBLE PRECISION :: dlamch
1701 INTEGER :: ilaenv, idamax
1702 EXTERNAL :: idamax, dlamch
1703 EXTERNAL dgeqrf, dormqr, xerbla
1704 EXTERNAL ilaenv
1705 EXTERNAL dgemm, dgemv, dlarfg, dswap
1706 DOUBLE PRECISION, EXTERNAL :: dnrm2
1707 info = 0
1708 islr = .false.
1709 IF( m.LT.0 ) THEN
1710 info = -1
1711 ELSE IF( n.LT.0 ) THEN
1712 info = -2
1713 ELSE IF( lda.LT.max( 1, m ) ) THEN
1714 info = -4
1715 END IF
1716 IF( info.EQ.0 ) THEN
1717 IF( ldw.LT.n ) THEN
1718 info = -8
1719 END IF
1720 END IF
1721 IF( info.NE.0 ) THEN
1722 WRITE(*,999) -info
1723 RETURN
1724 END IF
1725 minmn = min(m,n)
1726 IF( minmn.EQ.0 ) THEN
1727 rank = 0
1728 RETURN
1729 END IF
1730 nb = ilaenv( inb, 'CGEQRF', ' ', m, n, -1, -1 )
1731 SELECT CASE(abs(tol_opt))
1732 CASE(1)
1733 toleps_eff = toleps
1734 CASE(2)
1735C TOLEPS_EFF will be computed at step K=1 below
1736 CASE DEFAULT
1737 write(*,*) 'Internal error in DMUMPS_TRUNCATED_RRQR: TOL_OPT =',
1738 & tol_opt
1739 CALL mumps_abort()
1740 END SELECT
1741 toleps_eff = toleps
1742C
1743C Avoid pointers (and TARGET attribute on RWORK/WORK)
1744C because of implicit interface. An implicit interface
1745C is needed to avoid intermediate array copies
1746C VN1 => RWORK(1:N)
1747C VN2 => RWORK(N+1:2*N)
1748C AUXV => WORK(1:LDW,1:1)
1749C F => WORK(1:LDW,2:NB+1)
1750C LDF = LDW
1751* Initialize partial column norms. The first N elements of work
1752* store the exact column norms.
1753 DO j = 1, n
1754C VN1( J ) = dnrm2( M, A( 1, J ), 1 )
1755 rwork( j ) = dnrm2( m, a( 1, j ), 1 )
1756C VN2( J ) = VN1( J )
1757 rwork( n + j ) = rwork( j )
1758 jpvt(j) = j
1759 END DO
1760 IF (tol_opt.LT.0) THEN
1761C Compute TRUNC_ERR for first step
1762C TRUNC_ERR = dnrm2( N, VN1( 1 ), 1 )
1763 trunc_err = dnrm2( n, rwork( 1 ), 1 )
1764 ENDIF
1765 offset = 0
1766 tol3z = sqrt(dlamch('Epsilon'))
1767 DO
1768 jb = min(nb,minmn-offset)
1769 lsticc = 0
1770 k = 0
1771 DO
1772 IF(k.EQ.jb) EXIT
1773 k = k+1
1774 rk = offset+k
1775C PVT = ( RK-1 ) + IDAMAX( N-RK+1, VN1( RK ), 1 )
1776 pvt = ( rk-1 ) + idamax( n-rk+1, rwork( rk ), 1 )
1777 IF (rk.EQ.1) THEN
1778C IF (abs(TOL_OPT).EQ.2) TOLEPS_EFF = VN1(PVT)*TOLEPS
1779 IF (abs(tol_opt).EQ.2) toleps_eff = rwork(pvt)*toleps
1780 ENDIF
1781 IF (tol_opt.GT.0) THEN
1782C TRUNC_ERR = VN1(PVT)
1783 trunc_err = rwork(pvt)
1784C ELSE
1785C TRUNC_ERR has been already computed at previous step
1786 ENDIF
1787 IF(trunc_err.LT.toleps_eff) THEN
1788 rank = rk-1
1789 islr = .true.
1790 RETURN
1791 ENDIF
1792 inadmissible = (rk.GT.maxrank)
1793 IF (inadmissible) THEN
1794 rank = rk
1795 info = rk
1796 islr = .false.
1797 RETURN
1798 END IF
1799 IF( pvt.NE.rk ) THEN
1800 CALL dswap( m, a( 1, pvt ), 1, a( 1, rk ), 1 )
1801c CALL dswap( K-1, F( PVT-OFFSET, 1 ), LDF,
1802c & F( K, 1 ), LDF )
1803 CALL dswap( k-1, work( pvt-offset, 2 ), ldw,
1804 & work( k, 2 ), ldw )
1805 itemp = jpvt(pvt)
1806 jpvt(pvt) = jpvt(rk)
1807 jpvt(rk) = itemp
1808C VN1(PVT) = VN1(RK)
1809C VN2(PVT) = VN2(RK)
1810 rwork(pvt) = rwork(rk)
1811 rwork(n+pvt) = rwork(n+rk)
1812 END IF
1813* Apply previous Householder reflectors to column K:
1814* A(RK:M,RK) := A(RK:M,RK) - A(RK:M,OFFSET+1:RK-1)*F(K,1:K-1)**H.
1815 IF( k.GT.1 ) THEN
1816 CALL dgemv( 'No transpose', m-rk+1, k-1, -one,
1817C & A(RK,OFFSET+1), LDA, F(K,1), LDF,
1818 & a(rk,offset+1), lda, work(k,2), ldw,
1819 & one, a(rk,rk), 1 )
1820 END IF
1821* Generate elementary reflector H(k).
1822 IF( rk.LT.m ) THEN
1823 CALL dlarfg( m-rk+1, a(rk,rk), a(rk+1,rk), 1, tau(rk) )
1824 ELSE
1825 CALL dlarfg( 1, a(rk,rk), a(rk,rk), 1, tau(rk) )
1826 END IF
1827 akk = a(rk,rk)
1828 a(rk,rk) = one
1829* Compute Kth column of F:
1830* F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)**H*A(RK:M,K).
1831 IF( rk.LT.n ) THEN
1832 CALL dgemv( 'Transpose', m-rk+1, n-rk, tau(rk),
1833 & a(rk,rk+1), lda, a(rk,rk), 1, zero,
1834C & F( K+1, K ), 1 )
1835 & work( k+1, k+1 ), 1 )
1836 END IF
1837* Padding F(1:K,K) with zeros.
1838 DO j = 1, k
1839C F( J, K ) = ZERO
1840 work( j, k+1 ) = zero
1841 END DO
1842* Incremental updating of F:
1843* F(1:N,K) := F(1:N-OFFSET,K) -
1844* tau(RK)*F(1:N,1:K-1)*A(RK:M,OFFSET+1:RK-1)**H*A(RK:M,RK).
1845 IF( k.GT.1 ) THEN
1846 CALL dgemv( 'Transpose', m-rk+1, k-1, -tau(rk),
1847 & a(rk,offset+1), lda, a(rk,rk), 1, zero,
1848 & work(1,1), 1 )
1849C & AUXV(1,1), 1 )
1850 CALL dgemv( 'No transpose', n-offset, k-1, one,
1851 & work(1,2), ldw, work(1,1), 1, one, work(1,k+1), 1 )
1852C & F(1,1), LDF, AUXV(1,1), 1, ONE, F(1,K), 1 )
1853 END IF
1854* Update the current row of A:
1855* A(RK,RK+1:N) := A(RK,RK+1:N) - A(RK,OFFSET+1:RK)*F(K+1:N,1:K)**H.
1856 IF( rk.LT.n ) THEN
1857C CALL dgemv( 'No Transpose', N-RK, K, -ONE, F( K+1, 1 ),
1858 CALL dgemv( 'No Transpose', n-rk, k, -one, work( k+1,2 ),
1859 & ldw,
1860 & a( rk, offset+1 ), lda, one, a( rk, rk+1 ), lda )
1861 END IF
1862* Update partial column norms.
1863*
1864 IF( rk.LT.minmn ) THEN
1865 DO j = rk + 1, n
1866C IF( VN1( J ).NE.RZERO ) THEN
1867 IF( rwork( j ).NE.rzero ) THEN
1868*
1869* NOTE: The following 4 lines follow from the analysis in
1870* Lapack Working Note 176.
1871*
1872C TEMP = ABS( A( RK, J ) ) / VN1( J )
1873 temp = abs( a( rk, j ) ) / rwork( j )
1874 temp = max( rzero, ( rone+temp )*( rone-temp ) )
1875C TEMP2 = TEMP*( VN1( J ) / VN2( J ) )**2
1876 temp2 = temp*( rwork( j ) / rwork( n+j ) )**2
1877 IF( temp2 .LE. tol3z ) THEN
1878C VN2( J ) = dble( LSTICC )
1879 rwork( n+j ) = dble( lsticc )
1880 lsticc = j
1881 ELSE
1882C VN1( J ) = VN1( J )*SQRT( TEMP )
1883 rwork( j ) = rwork( j )*sqrt( temp )
1884 END IF
1885 END IF
1886 END DO
1887 END IF
1888 a( rk, rk ) = akk
1889 IF (lsticc.NE.0) EXIT
1890 IF (tol_opt.LT.0) THEN
1891C Compute TRUNC_ERR for next step
1892C TRUNC_ERR = dnrm2( N-RK, VN1( RK+1 ), 1 )
1893 trunc_err = dnrm2( n-rk, rwork( rk+1 ), 1 )
1894 ENDIF
1895 END DO
1896* Apply the block reflector to the rest of the matrix:
1897* A(RK+1:M,RK+1:N) := A(RK+1:M,RK+1:N) -
1898* A(RK+1:M,OFFSET+1:RK)*F(K+1:N-OFFSET,1:K)**H.
1899 IF( rk.LT.min(n,m) ) THEN
1900 CALL dgemm( 'No transpose', 'Transpose', m-rk,
1901 & n-rk, k, -one, a(rk+1,offset+1), lda,
1902C & F(K+1,1), LDF, ONE, A(RK+1,RK+1), LDA )
1903 & work(k+1,2), ldw, one, a(rk+1,rk+1), lda )
1904 END IF
1905* Recomputation of difficult columns.
1906 DO WHILE( lsticc.GT.0 )
1907C ITEMP = NINT( VN2( LSTICC ) )
1908 itemp = nint( rwork( n + lsticc ) )
1909C VN1( LSTICC ) = dnrm2( M-RK, A( RK+1, LSTICC ), 1 )
1910 rwork( lsticc ) = dnrm2( m-rk, a( rk+1, lsticc ), 1 )
1911*
1912* NOTE: The computation of RWORK( LSTICC ) relies on the fact that
1913* SNRM2 does not fail on vectors with norm below the value of
1914* SQRT(DLAMCH('S'))
1915*
1916C VN2( LSTICC ) = VN1( LSTICC )
1917 rwork( n + lsticc ) = rwork( lsticc )
1918 lsticc = itemp
1919 END DO
1920 IF(rk.GE.minmn) EXIT
1921 offset = rk
1922 IF (tol_opt.LT.0) THEN
1923C Compute TRUNC_ERR for next step
1924C TRUNC_ERR = dnrm2( N-RK, VN1( RK+1 ), 1 )
1925 trunc_err = dnrm2( n-rk, rwork( rk+1 ), 1 )
1926 ENDIF
1927 END DO
1928 rank = rk
1929 islr = .NOT.(rk.GT.maxrank)
1930 RETURN
1931 999 FORMAT ('On entry to DMUMPS_TRUNCATED_RRQR, parameter number',
1932 & i2,' had an illegal value')
#define mumps_abort
Definition VE_Metis.h:25
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
Definition ilaenv.f:162
subroutine xerbla(srname, info)
XERBLA
Definition xerbla.f:60
integer function idamax(n, dx, incx)
IDAMAX
Definition idamax.f:71
subroutine dgeqrf(m, n, a, lda, tau, work, lwork, info)
DGEQRF
Definition dgeqrf.f:146
subroutine dlarfg(n, alpha, x, incx, tau)
DLARFG generates an elementary reflector (Householder matrix).
Definition dlarfg.f:106
subroutine dormqr(side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
DORMQR
Definition dormqr.f:167
subroutine dswap(n, dx, incx, dy, incy)
DSWAP
Definition dswap.f:82
subroutine dgemv(trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
DGEMV
Definition dgemv.f:156
subroutine dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
DGEMM
Definition dgemm.f:187
real(wp) function dnrm2(n, x, incx)
DNRM2
Definition dnrm2.f90:89
double precision function dlamch(cmach)
DLAMCH
Definition dlamch.f:69
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21