OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
csol_driver.F File Reference

Go to the source code of this file.

Functions/Subroutines

subroutine cmumps_solve_driver (id)
subroutine cmumps_check_distrhs (idnloc_rhs, idlrhs_loc, nrhs, idirhs_loc, idrhs_loc, info)
subroutine cmumps_pp_solve ()

Function/Subroutine Documentation

◆ cmumps_check_distrhs()

subroutine cmumps_solve_driver::cmumps_check_distrhs ( integer, intent(in) idnloc_rhs,
integer, intent(in) idlrhs_loc,
integer, intent(in) nrhs,
integer, dimension (:), intent(in), pointer idirhs_loc,
complex, dimension (:), intent(in), pointer idrhs_loc,
integer, dimension(80), intent(inout) info )
private

Definition at line 5553 of file csol_driver.F.

5560C
5561C Purpose:
5562C =======
5563C
5564C Check distributed RHS format. We assume that
5565C the user has indicated that he/she provided
5566C a distributed RHS (KEEP(248)=-1). We also
5567C assume that the nb of RHS columns NRHS has
5568C been broadcasted to all processes. This
5569C routine should then be called on the workers.
5570C
5571C Arguments:
5572C =========
5573C
5574 INTEGER, INTENT( IN ) :: idNloc_RHS
5575 INTEGER, INTENT( IN ) :: idLRHS_loc
5576 INTEGER, INTENT( IN ) :: NRHS
5577#if defined(MUMPS_F2003)
5578 INTEGER, INTENT( IN ), POINTER :: idIRHS_loc (:)
5579 COMPLEX, INTENT( IN ), POINTER :: idRHS_loc (:)
5580#else
5581 INTEGER, POINTER :: idIRHS_loc (:)
5582 COMPLEX, POINTER :: idRHS_loc (:)
5583#endif
5584 INTEGER, INTENT( INOUT ) :: INFO(80)
5585C
5586C Local declarations:
5587C ==================
5588C
5589 INTEGER(8) :: REQSIZE8
5590C
5591C Executable statements:
5592C =====================
5593C
5594C Quick return if nothing on this proc
5595 IF (idnloc_rhs .LE. 0) RETURN
5596C Check for leading dimension
5597 IF (nrhs.NE.1) THEN
5598 IF ( idlrhs_loc .LT. idnloc_rhs) THEN
5599 info(1)=-55
5600 info(2)=idlrhs_loc
5601 RETURN
5602 ENDIF
5603 ENDIF
5604 IF (idnloc_rhs .GT. 0) THEN
5605C Check association and size of index array idIRHS_loc
5606 IF (.NOT. associated(idirhs_loc)) THEN
5607 id%INFO(1)=-22
5608 id%INFO(2)=17
5609 RETURN
5610 ELSE IF (size(idirhs_loc) .LT. idnloc_rhs) THEN
5611 info(1)=-22
5612 info(2)= 17
5613 RETURN
5614 ENDIF
5615C Check association and size of value array idRHS_loc
5616 IF (.NOT. associated(idrhs_loc)) THEN
5617 id%INFO(1)=-22
5618 id%INFO(2)=18
5619 RETURN
5620 ELSE
5621C Check size of array of values idRHS_loc
5622 reqsize8 = int(idlrhs_loc,8)*int(nrhs,8)
5623 & + int(-idlrhs_loc+idnloc_rhs,8)
5624#if defined(MUMPS_F2003)
5625 IF (size(idrhs_loc,kind=8) .LT. reqsize8) THEN
5626#else
5627 IF ( reqsize8 .LE. int(huge(idnloc_rhs),8) .AND.
5628 & size(idrhs_loc) .LT. int(reqsize8) ) THEN
5629C (Warning: this assumes that size(idRHS_loc)
5630C does not overflow)
5631#endif
5632 info(1)=-22
5633 info(2)=18
5634 RETURN
5635 ENDIF
5636 ENDIF
5637 ENDIF
5638 RETURN
initmumps id

◆ cmumps_pp_solve()

subroutine cmumps_solve_driver::cmumps_pp_solve
private

Definition at line 5640 of file csol_driver.F.

5641 IMPLICIT NONE
5642C
5643C Purpose:
5644C =======
5645C Scatter right-hand side, solve the system,
5646C and gather the solution on the host during
5647C post-processing.
5648C We use an internal subroutine to avoid code
5649C duplication without the complication of adding
5650C new parameters or local variables. All variables
5651C in this routine have the scope of CMUMPS_SOL_DRIVER.
5652C
5653C
5654 IF (kase .NE. 1 .AND. kase .NE. 2) THEN
5655 WRITE(*,*) "Internal error 1 in CMUMPS_PP_SOLVE"
5656 CALL mumps_abort()
5657 ENDIF
5658 IF ( id%MYID .eq. master ) THEN
5659C Define matrix B as follows:
5660C MTYPE=1 => B=A other values B=At
5661C The user asked to solve the system Bx=b
5662C
5663C THEN
5664C KASE = 1........ RW1 = INV(TRANSPOSE(B)) * RW1
5665C KASE = 2........ RW1 = INV(B) * RW1
5666 IF ( mtype .EQ. 1 ) THEN
5667 solvet = kase - 1
5668 ELSE
5669 solvet = kase
5670 END IF
5671C SOLVET= 1 -> solve A x = B, other values solve Atx=b
5672C We force SOLVET to have value either 0 or 1, in order
5673C to be able to test both values, and also, be able to
5674C test whether SOLVET = MTYPE or not.
5675 IF ( solvet.EQ.2 ) solvet = 0
5676 IF ( lscal ) THEN
5677 IF ( solvet .EQ. 1 ) THEN
5678C Apply rowscaling
5679 DO k = 1, id%N
5680 c_y( k ) = c_y( k ) * id%ROWSCA( k )
5681 END DO
5682 ELSE
5683C Apply column scaling
5684 DO k = 1, id%N
5685 c_y( k ) = c_y( k ) * id%COLSCA( k )
5686 END DO
5687 END IF
5688 END IF
5689 END IF ! MYID.EQ.MASTER
5690C ------------------------------
5691C Broadcast SOLVET to the slaves
5692C ------------------------------
5693 CALL mpi_bcast( solvet, 1, mpi_integer, master,
5694 & id%COMM, ierr)
5695C --------------------------------------------
5696C Scatter the right hand side C_Y on all procs
5697C --------------------------------------------
5698 IF ( .NOT.i_am_slave ) THEN
5699C -- Master not working
5700 CALL cmumps_scatter_rhs(id%NSLAVES,id%N, id%MYID,
5701 & id%COMM,
5702 & solvet, c_y(1), id%N, 1,
5703 & 1,
5704 & c_dummy, 1, 1,
5705 & idummy, 0,
5706 & jdummy, id%KEEP(1), id%KEEP8(1), id%PROCNODE_STEPS(1),
5707 & idummy, 1,
5708 & id%STEP(1),
5709 & id%ICNTL(1),id%INFO(1))
5710 ELSE
5711 IF (solvet.EQ.mtype) THEN
5712C POSINRHSCOMP_ROW is with respect to the
5713C original linear system (transposed or not)
5714 ptr_posinrhscomp_fwd => id%POSINRHSCOMP_ROW
5715 ELSE
5716C Transposed, use column indices of original
5717C system (ie, col indices of A or A^T)
5718 ptr_posinrhscomp_fwd => id%POSINRHSCOMP_COL
5719 ENDIF
5720 liw_passed = max( liw, 1 )
5721 CALL cmumps_scatter_rhs(id%NSLAVES,id%N, id%MYID,
5722 & id%COMM,
5723 & solvet, c_y(1), id%N, 1,
5724 & 1,
5725 & id%RHSCOMP(ibeg_rhscomp), ld_rhscomp, 1,
5726 & ptr_posinrhscomp_fwd(1), nb_fs_rhscomp_f,
5727C
5728 & id%PTLUST_S(1), id%KEEP(1), id%KEEP8(1),
5729 & id%PROCNODE_STEPS(1),
5730 & is(1), liw_passed,
5731 & id%STEP(1),
5732 & id%ICNTL(1),id%INFO(1))
5733 ENDIF
5734 IF (info(1).LT.0) GOTO 89
5735C
5736C Solve the system
5737C
5738 IF ( i_am_slave ) THEN
5739 liw_passed = max( liw, 1 )
5740 la_passed = max( la, 1_8 )
5741 IF (solvet.EQ.mtype) THEN
5742 ptr_posinrhscomp_fwd => id%POSINRHSCOMP_ROW
5743 ptr_posinrhscomp_bwd => id%POSINRHSCOMP_COL
5744 ELSE
5745 ptr_posinrhscomp_fwd => id%POSINRHSCOMP_COL
5746 ptr_posinrhscomp_bwd => id%POSINRHSCOMP_ROW
5747 ENDIF
5748 from_pp=.true.
5749 nbsparse_loc = .false.
5750 CALL cmumps_sol_c(id%root, id%N, id%S(1), la_passed, id%IS(1),
5751 & liw_passed,work_wcb(1),lwcb8_sol_c,iwcb,liwcb,nbrhs_eff,id%NA(1),
5752 & id%LNA,id%NE_STEPS(1),srw3,solvet,icntl(1),from_pp,id%STEP(1),
5753 & id%FRERE_STEPS(1),id%DAD_STEPS(1),id%FILS(1),id%PTLUST_S(1),
5754 & id%PTRFAC(1), iwk_solve(1), liwk_solve, ptracb, liwk_ptracb,
5755 & id%PROCNODE_STEPS(1), id%NSLAVES, info(1), keep(1), keep8(1),
5756 & id%DKEEP(1),id%COMM_NODES,id%MYID,id%MYID_NODES, bufr(1), lbufr,
5757 & lbufr_bytes, id%ISTEP_TO_INIV2(1), id%TAB_POS_IN_PERE(1,1),
5758C Next 3 arguments are not used in this call
5759 & ibeg_root_def,iend_root_def,iroot_def_rhs_col1, ptr_rhs_root(1),
5760 & lptr_rhs_root, size_root, master_root, id%RHSCOMP(ibeg_rhscomp),
5761 & ld_rhscomp,ptr_posinrhscomp_fwd(1),ptr_posinrhscomp_bwd(1),
5762 & 1,1,1,1, idummy, 1, jdummy, kdummy, 1, ldummy, 1, mdummy, 1,1,
5763 & nbsparse_loc, ptr_rhs_bounds(1), lptr_rhs_bounds
5764 & , id%IPOOL_B_L0_OMP(1), id%LPOOL_B_L0_OMP, id%IPOOL_A_L0_OMP(1),
5765 & id%LPOOL_A_L0_OMP, id%L_VIRT_L0_OMP, id%VIRT_L0_OMP(1),
5766 & id%L_PHYS_L0_OMP, id%PHYS_L0_OMP(1), id%PERM_L0_OMP(1),
5767 & id%PTR_LEAFS_L0_OMP(1), id%L0_OMP_MAPPING(1), id%LL0_OMP_MAPPING,
5768 & id%L0_OMP_FACTORS(1), id%LL0_OMP_FACTORS
5769 & )
5770 END IF
5771C ------------------
5772C Change error codes
5773C ------------------
5774 IF (info(1).eq.-2) info(1)=-12
5775 IF (info(1).eq.-3) info(1)=-15
5776C
5777 IF (info(1) .GE. 0) THEN
5778C We need a workspace of minimal size KEEP(247)
5779C in order to unpack pieces of the solution during
5780C CMUMPS_GATHER_SOLUTION below
5781C - Avoid allocation if error already occurred.
5782C - DEALLOCATE called after GATHER_SOLUTION
5783C CWORK not needed for AM1
5784 ALLOCATE( cwork(max(max(keep(247),keep(246)),1)),
5785 & stat=allocok)
5786 IF (allocok > 0) THEN
5787 info(1)=-13
5788 info(2)=max(max(keep(247),keep(246)),1)
5789 ENDIF
5790 ENDIF
5791C -------------------------
5792C Propagate possible errors
5793C -------------------------
5794 89 CALL mumps_propinfo( icntl(1), info(1),
5795 & id%COMM,id%MYID)
5796C
5797C Return in case of error.
5798 IF (info(1).LT.0) RETURN
5799C -------------------------------
5800C Assemble the solution on master
5801C -------------------------------
5802C (Note: currently, if this part of code is executed,
5803C then necessarily NBRHS_EFF = 1)
5804C
5805C === GATHER and SCALE solution ==============
5806C
5807 IF ((id%MYID.NE.master).OR. .NOT.lscal) THEN
5808 pt_scaling => dummy_scal
5809 ELSE
5810 IF (solvet.EQ.1) THEN
5811 pt_scaling => id%COLSCA
5812 ELSE
5813 pt_scaling => id%ROWSCA
5814 ENDIF
5815 ENDIF
5816 liw_passed = max( liw, 1 )
5817C Solution computed during CMUMPS_SOL_C has been stored
5818C in id%RHSCOMP and is gathered on the master in C_Y
5819 IF ( .NOT. i_am_slave ) THEN
5820C I did not participate to computing part of the solution
5821C (id%RHSCOMP not set/allocate) : receive solution, store
5822C it and scale it.
5823 CALL cmumps_gather_solution(id%NSLAVES,id%N,
5824 & id%MYID, id%COMM, nbrhs_eff,
5825 & solvet, c_y, id%N, nbrhs_eff, 1,
5826 & jdummy, id%KEEP(1),id%KEEP8(1), id%PROCNODE_STEPS(1),
5827 & idummy, 1,
5828 & id%STEP(1), bufr(1), lbufr, lbufr_bytes,
5829 & cwork(1), size(cwork),
5830 & lscal, pt_scaling(1), size(pt_scaling),
5831! RHSCOMP not on non-working master
5832 & c_dummy, 1 , 1, idummy, 1,
5833! for sparse permuted RHS on host
5834 & perm_rhs, size(perm_rhs)
5835 & )
5836 ELSE
5837 CALL cmumps_gather_solution(id%NSLAVES,id%N,
5838 & id%MYID, id%COMM, nbrhs_eff,
5839 & solvet, c_y, id%N, nbrhs_eff, 1,
5840 & id%PTLUST_S(1), id%KEEP(1),id%KEEP8(1),
5841 & id%PROCNODE_STEPS(1),
5842 & is(1), liw_passed,
5843 & id%STEP(1), bufr(1), lbufr, lbufr_bytes,
5844 & cwork(1), size(cwork),
5845 & lscal, pt_scaling(1), size(pt_scaling),
5846 & id%RHSCOMP(ibeg_rhscomp), ld_rhscomp, nbrhs_eff,
5847 & ptr_posinrhscomp_bwd(1), id%N,
5848 & perm_rhs, size(perm_rhs)) ! for sparse permuted RHS on host
5849 ENDIF
5850 DEALLOCATE( cwork )
#define mumps_abort
Definition VE_Metis.h:25
subroutine mumps_propinfo(icntl, info, comm, id)
subroutine cmumps_gather_solution(nslaves, n, myid, comm, nrhs, mtype, rhs, lrhs, ncol_rhs, jbeg_rhs, ptrist, keep, keep8, procnode_steps, iw, liw, step, buffer, size_buf, size_buf_bytes, cwork, lcwork, lscal, scaling, lscaling, rhscomp, lrhscomp, ncol_rhscomp, posinrhscomp, lpos_n, perm_rhs, size_perm_rhs)
Definition csol_c.F:1083
subroutine cmumps_scatter_rhs(nslaves, n, myid, comm, mtype, rhs, lrhs, ncol_rhs, nrhs, rhscomp, lrhscomp, ncol_rhscomp, posinrhscomp_fwd, nb_fs_in_rhscomp_f, ptrist, keep, keep8, procnode_steps, iw, liw, step, icntl, info)
Definition csol_c.F:1829
subroutine cmumps_sol_c(root, n, a, la, iw, liw, w, lwc, iwcb, liww, nrhs, na, lna, ne_steps, w2, mtype, icntl, from_pp, step, frere, dad, fils, ptrist, ptrfac, iw1, liw1, ptracb, liwk_ptracb, procnode_steps, slavef, info, keep, keep8, dkeep, comm_nodes, myid, myid_nodes, bufr, lbufr, lbufr_bytes, istep_to_iniv2, tab_pos_in_pere, ibeg_root_def, iend_root_def, iroot_def_rhs_col1, rhs_root, lrhs_root, size_root, master_root, rhscomp, lrhscomp, posinrhscomp_fwd, posinrhscomp_bwd, nz_rhs, nbcol_inbloc, nrhs_orig, jbeg_rhs, step2node, lstep2node, irhs_sparse, irhs_ptr, size_perm_rhs, perm_rhs, size_uns_perm_inv, uns_perm_inv, nb_fs_in_rhscomp_f, nb_fs_in_rhscomp_tot, do_nbsparse, rhs_bounds, lrhs_bounds, ipool_b_l0_omp, lpool_b_l0_omp, ipool_a_l0_omp, lpool_a_l0_omp, l_virt_l0_omp, virt_l0_omp, l_phys_l0_omp, phys_l0_omp, perm_l0_omp, ptr_leafs_l0_omp, l0_omp_mapping, ll0_omp_mapping, l0_omp_factors, ll0_omp_factors)
Definition csol_c.F:31
#define max(a, b)
Definition macros.h:21
subroutine mpi_bcast(buffer, cnt, datatype, root, comm, ierr)
Definition mpi.f:205

◆ cmumps_solve_driver()

subroutine cmumps_solve_driver ( type (cmumps_struc), target id)

Definition at line 14 of file csol_driver.F.

17C Lock Initialization (_LI) and Desruction (_LD)
20C
21C Purpose
22C =======
23C
24C Performs solution phase (solve), Iterative Refinements
25C and Error analysis.
26C
27C
28C
29C
30 USE cmumps_buf
31 USE cmumps_ooc
38!$ USE OMP_LIB
39 IMPLICIT NONE
40C -------------------
41C Explicit interfaces
42C -------------------
43 INTERFACE
44 SUBROUTINE cmumps_size_in_struct( id, NB_INT,NB_CMPLX,NB_CHAR )
46 TYPE (CMUMPS_STRUC) :: id
47 INTEGER(8) :: NB_INT,NB_CMPLX,NB_CHAR
48 END SUBROUTINE cmumps_size_in_struct
49 SUBROUTINE cmumps_check_dense_rhs
50 &(idrhs, idinfo, idn, idnrhs, idlrhs)
51 COMPLEX, DIMENSION(:), POINTER :: idRHS
52 INTEGER, intent(in) :: idN, idNRHS, idLRHS
53 INTEGER, intent(inout) :: idINFO(:)
54 END SUBROUTINE cmumps_check_dense_rhs
55 END INTERFACE
56C
57 include 'mpif.h'
58 include 'mumps_headers.h'
59 include 'mumps_tags.h'
60#if defined(V_T)
61 include 'VT.inc'
62#endif
63 INTEGER :: STATUS(MPI_STATUS_SIZE)
64 INTEGER :: IERR
65 INTEGER, PARAMETER :: MASTER = 0
66c
67C Parameters
68C ==========
69C
70 TYPE (CMUMPS_STRUC), TARGET :: id
71C
72C Local variables
73C ===============
74C
75 INTEGER MP,LP, MPG
76 LOGICAL PROK, PROKG, LPOK
77 INTEGER MTYPE, ICNTL21
78 LOGICAL LSCAL, POSTPros, GIVSOL
79 INTEGER ICNTL10, ICNTL11
80 INTEGER I,IPERM,K,JPERM, J, II, IZ2
81 INTEGER IZ, NZ_THIS_BLOCK, PJ
82C pointers in IS
83 INTEGER LIW
84C pointers in id%S
85 INTEGER(8) :: LA, LA_PASSED
86 INTEGER LIW_PASSED
87 INTEGER(8) :: LWCB8_MIN, LWCB8, LWCB8_SOL_C
88C buffer sizes
89 INTEGER CMUMPS_LBUF, CMUMPS_LBUF_INT
90 INTEGER(8) :: CMUMPS_LBUF_8
91 INTEGER :: LBUFR, LBUFR_BYTES
92 INTEGER :: MSG_MAX_BYTES_SOLVE, MSG_MAX_BYTES_GTHRSOL
93 INTEGER(8) :: MSG_MAX_BYTES_SOLVE8
94C reception buffer
95 INTEGER, ALLOCATABLE, DIMENSION(:) :: BUFR
96C null space
97 INTEGER IBEG_ROOT_DEF, IEND_ROOT_DEF,
98 & IBEG_GLOB_DEF, IEND_GLOB_DEF,
99 & IROOT_DEF_RHS_COL1
100C
101 INTEGER NITREF, NOITER, SOLVET, KASE
102C Meaningful only with tree pruning and sparse RHS
103 LOGICAL INTERLEAVE_PAR, DO_PERMUTE_RHS
104C true if CMUMPS_SOL_C called during postprocessing
105 LOGICAL FROM_PP
106C
107C TIMINGS
108 DOUBLE PRECISION TIMEIT, TIMEEA, TIMEEA1, TIMELCOND
109 DOUBLE PRECISION TIME3
110 DOUBLE PRECISION TIMEC1,TIMEC2
111 DOUBLE PRECISION TIMEGATHER1,TIMEGATHER2
112 DOUBLE PRECISION TIMESCATTER1,TIMESCATTER2
113 DOUBLE PRECISION TIMECOPYSCALE1,TIMECOPYSCALE2
114C ------------------------------------------
115C Declarations related to exploit sparsity
116C ------------------------------------------
117 INTEGER :: NRHS_NONEMPTY
118 INTEGER :: STRAT_PERMAM1
119 LOGICAL :: DO_NULL_PIV
120 INTEGER, DIMENSION(:), POINTER :: IRHS_PTR_COPY
121 INTEGER, DIMENSION(:), POINTER :: IRHS_SPARSE_COPY
122 COMPLEX, DIMENSION(:), POINTER :: RHS_SPARSE_COPY
123 LOGICAL IRHS_SPARSE_COPY_ALLOCATED, IRHS_PTR_COPY_ALLOCATED,
124 & RHS_SPARSE_COPY_ALLOCATED
125C
126 INTEGER, DIMENSION(:), ALLOCATABLE :: MAP_RHS_loc
127 INTEGER, DIMENSION(:), POINTER :: IRHS_loc_PTR
128 LOGICAL :: IRHS_loc_PTR_allocated
129 COMPLEX, DIMENSION(:), POINTER :: idRHS_loc
130 INTEGER(8) :: DIFF_SOL_loc_RHS_loc
131 INTEGER(8) :: RHS_loc_size, RHS_loc_shift
132 INTEGER(8) :: NBT
133 INTEGER :: NBCOL, COLSIZE, JBEG_RHS, JEND_RHS, JBEG_NEW,
134 & NBCOL_INBLOC, IPOS, IPOSRHSCOMP
135 INTEGER, DIMENSION(:), ALLOCATABLE :: PERM_RHS
136 INTEGER, DIMENSION(:), POINTER :: PTR_POSINRHSCOMP_FWD,
137 & PTR_POSINRHSCOMP_BWD
138 COMPLEX, DIMENSION(:), POINTER :: PTR_RHS
139 INTEGER :: SIZE_IPTR_WORKING, SIZE_WORKING
140C NRHS_NONEMPTY: holds
141C either the original number of RHS (id%NRHS defined on host)
142C or, when the RHS is sparse, it holds the
143C number of non empty columns.
144C it is computed on master and is
145C then broadcasted on all processes.
146C IRHS_PTR_COPY holds a compressed local copy of IRHS_PTR (or points
147C on the master to id%IRHS_PTR if no permutation requested)
148C IRHS_SPARSE_COPY might be allocated or might also point to
149C id%IRHS_SPARSE. To test if we can deallocate it we trace
150C with IRHS_SPARSE_COPY_ALLOCATED when it was effectively
151C allocated.
152C NBCOL_INBLOC total nb columns to process in this block
153C JBEG_RHS global ptr for starting column requested for this block
154C JEND_RHS global ptr for end column_number requested for this block
155C PERM_RHS -- Permutation of RHS computed on master and broadcasted
156C on all procs (of size id%NRHS orginal)
157C PERM_RHS(k) = i means that i is the kth column to be processed
158C Note that PERM_RHS will be used also in case of interleaving
159C ------------------------------------
160 INTEGER :: NOMP
161 COMPLEX ONE
162 COMPLEX ZERO
163 parameter( one = (1.0e0,0.0e0) )
164 parameter( zero = (0.0e0,0.0e0) )
165 REAL RZERO, RONE
166 parameter( rzero = 0.0e0, rone = 1.0e0 )
167C
168C RHS_IR is internal to CMUMPS and used for iterative refinement
169C or the error analysis section. It either points to the user's
170C RHS (on the host when the solution is centralized or the RHS
171C is dense), or is a workarray allocated inside this routine
172C of size N.
173 COMPLEX, DIMENSION(:), POINTER :: RHS_IR
174 COMPLEX, DIMENSION(:), POINTER :: WORK_WCB
175 COMPLEX, DIMENSION(:), POINTER :: PTR_RHS_ROOT
176 INTEGER(8) :: LPTR_RHS_ROOT
177C
178C Local workarrays that will be dynamically allocated
179C
180 COMPLEX, ALLOCATABLE :: SAVERHS(:), C_RW1(:),
181 & C_RW2(:),
182 & SRW3(:), C_Y(:),
183 & C_W(:)
184 INTEGER :: LCWORK
185 COMPLEX, ALLOCATABLE :: CWORK(:)
186 INTEGER, ALLOCATABLE :: MAP_RHS(:)
187 REAL, ALLOCATABLE :: R_Y(:), D(:)
188 REAL, ALLOCATABLE :: R_W(:)
189C The 2 following workarrays are temporary local
190C arrays only used for distributed matrix input
191C (KEEP(54) .NE. 0).
192 REAL, ALLOCATABLE, DIMENSION(:) :: R_LOCWK54
193 COMPLEX, ALLOCATABLE, DIMENSION(:) :: C_LOCWK54
194 INTEGER :: NBENT_RHSCOMP, NB_FS_RHSCOMP_F,
195 & NB_FS_RHSCOMP_TOT
196 INTEGER, DIMENSION(:), ALLOCATABLE :: UNS_PERM_INV
197 LOGICAL :: UNS_PERM_INV_NEEDED_INMAINLOOP,
198 & UNS_PERM_INV_NEEDED_BEFMAINLOOP
199 INTEGER LIWK_SOLVE, LIWCB
200 INTEGER, ALLOCATABLE :: IW1(:), IWK_SOLVE(:), IWCB(:)
201 INTEGER :: LIWK_PTRACB
202 INTEGER(8), ALLOCATABLE :: PTRACB(:)
203C
204C Parameters arising from the structure
205C
206 INTEGER(8) :: MAXS
207 REAL, DIMENSION(:), POINTER :: CNTL
208 INTEGER, DIMENSION (:), POINTER :: KEEP,ICNTL,INFO
209 INTEGER(8), DIMENSION (:), POINTER :: KEEP8
210 INTEGER, DIMENSION (:), POINTER :: IS
211 REAL, DIMENSION(:),POINTER:: RINFOG
212C ===============================================================
213C SCALING issues:
214C When scaling was performed
215C RHS holds the solution of the scaled system
216C The unscaled second member (b0) was given
217C then we have to scale both rhs adn solution:
218C A(sca) = LU = D1*A*D2 , with D2 = COLSCA
219C D1 = ROWSCA
220C --------------
221C CASE OF A X =B
222C --------------
223C (ICNTL(9)=1 or MTYPE=1)
224C A*x0 = b0
225C b(sca) = D1 * b0 = ROWSCA*S(ISTW3)
226C A(sca) [(D2) **(-1)] x0 = b(sca)
227C so the computed solution by Check y0 of LU *y0 = b(sca)
228C is : y0 =[(D2) **(-1)] x0 and so x0= D2*y0 is modified
229C --------------
230C CASE OF AT X =B
231C --------------
232C (ICNTL(9).NE.1 or MTYPE=0)
233C A(sca) = LU = D1*A*D2
234C AT*x0 = b0 => D2ATD1 D1-1 x0 = D2b0
235C b(sca) = D2 * b0 = COLSCA*S(ISTW3)
236C A(sca)T [(D1) **(-1)] x0 = b(sca)
237C so the computed solution by Check y0 of LU *y0 = b(sca)
238C is : y0 =[(D1) **(-1)] x0 and so x0= D1*y0 is modified
239C
240C In case of distributed RHS we need
241C scaling information on each processor
242C
243 type scaling_data_t
244 sequence
245 REAL, dimension(:), pointer :: SCALING
246 REAL, dimension(:), pointer :: SCALING_LOC
247 end type scaling_data_t
248 type (scaling_data_t) :: scaling_data_sol, scaling_data_dr
249C To scale on the fly during GATHER SOLUTION
250 REAL, DIMENSION(:), POINTER :: PT_SCALING
251 REAL, TARGET :: Dummy_SCAL(1)
252C
253C ==================== END OF SCALING related data ================
254C
255C Local variables
256C
257C Interval associated to the subblocks of RHS a node has to process
258 INTEGER, DIMENSION(:), ALLOCATABLE, TARGET :: RHS_BOUNDS
259 INTEGER :: LPTR_RHS_BOUNDS
260 INTEGER, DIMENSION(:), POINTER :: PTR_RHS_BOUNDS
261 LOGICAL :: DO_NBSPARSE, NBSPARSE_LOC
262 LOGICAL :: PRINT_MAXAVG
263 REAL ARRET
264 COMPLEX C_DUMMY(1)
265 REAL R_DUMMY(1)
266 INTEGER IDUMMY(1), JDUMMY(1), KDUMMY(1), LDUMMY(1), MDUMMY(1)
267 INTEGER, TARGET :: IDUMMY_TARGET(1)
268 COMPLEX, TARGET :: CDUMMY_TARGET(1)
269 INTEGER JJ
270 INTEGER allocok
271 INTEGER NBRHS, NBRHS_EFF, BEG_RHS, NB_RHSSKIPPED,
272 & LD_RHS,
273 & MASTER_ROOT, MASTER_ROOT_IN_COMM
274 INTEGER SIZE_ROOT, LD_REDRHS
275 INTEGER(8) :: IPT_RHS_ROOT
276 INTEGER(8) :: IBEG, IBEG_RHSCOMP, KDEC, IBEG_loc, IBEG_REDRHS
277 INTEGER LD_RHSCOMP, NCOL_RHS_loc
278 INTEGER LD_RHS_loc, JBEG_RHS_loc
279 INTEGER NB_K133, IRANK, TSIZE
280 INTEGER KMAX_246_247
281 INTEGER IFLAG_IR, IRStep
282 LOGICAL TESTConv
283 LOGICAL WORKSPACE_MINIMAL_PREFERRED, WK_USER_PROVIDED
284 INTEGER(8) NB_BYTES !size of data allocated during solve
285 INTEGER(8) NB_BYTES_MAX !MAX size of data allocated during solve
286 INTEGER(8) NB_BYTES_EXTRA !For Step2Node, which may be freed later
287 INTEGER(8) NB_BYTES_LOC !For temp. computations
288 INTEGER(8) NB_INT, NB_CMPLX, NB_CHAR, K34_8, K35_8
289 INTEGER(8) K16_8, ITMP8, NB_BYTES_ON_ENTRY
290#if defined(V_T)
291C Vampir
292 INTEGER soln_drive_class, glob_comm_ini, perm_scal_ini, soln_dist,
293 & soln_assem, perm_scal_post
294#endif
295 LOGICAL I_AM_SLAVE, BUILD_POSINRHSCOMP
296 LOGICAL :: BUILD_RHSMAPINFO
297 LOGICAL WORK_WCB_ALLOCATED, IS_INIT_OOC_DONE
298 LOGICAL :: IS_LR_MOD_TO_STRUC_DONE
299 INTEGER :: KEEP350_SAVE
300 LOGICAL STOP_AT_NEXT_EMPTY_COL
301 INTEGER MTYPE_LOC
302 INTEGER(4) :: I4
303 INTEGER MAT_ALLOC_LOC, MAT_ALLOC
304 INTEGER MUMPS_PROCNODE
305 EXTERNAL mumps_procnode
306 INTEGER(8) :: FILE_SIZE,STRUC_SIZE
307C
308C First executable statement
309C
310#if defined(V_T)
311 CALL vtclassdef( 'Soln driver',soln_drive_class,ierr)
312 CALL vtfuncdef( 'glob_comm_ini',soln_drive_class,
313 & glob_comm_ini,ierr)
314 CALL vtfuncdef( 'perm_scal_ini',soln_drive_class,
315 & perm_scal_ini,ierr)
316 CALL vtfuncdef( 'soln_dist',soln_drive_class,soln_dist,ierr)
317 CALL vtfuncdef( 'soln_assem',soln_drive_class,soln_assem,ierr)
318 CALL vtfuncdef( 'perm_scal_post',soln_drive_class,
319 & perm_scal_post,ierr)
320#endif
321C -- The following pointers xxCOPY might be allocated but then
322C -- the associated xxCOPY_ALLOCATED will be set to
323C -- enable deallocation
324 irhs_ptr_copy => idummy_target
325 irhs_ptr_copy_allocated = .false.
326 irhs_sparse_copy => idummy_target
327 irhs_sparse_copy_allocated=.false.
328 rhs_sparse_copy => cdummy_target
329 rhs_sparse_copy_allocated=.false.
330 NULLIFY(rhs_ir)
331 NULLIFY(work_wcb)
332 NULLIFY(scaling_data_dr%SCALING)
333 NULLIFY(scaling_data_dr%SCALING_LOC)
334 NULLIFY(scaling_data_sol%SCALING)
335 NULLIFY(scaling_data_sol%SCALING_LOC)
336 irhs_loc_ptr_allocated = .false.
337 is_init_ooc_done = .false.
338 is_lr_mod_to_struc_done = .false.
339 wk_user_provided = .false.
340 work_wcb_allocated = .false.
341 cntl =>id%CNTL
342 keep =>id%KEEP
343 keep8=>id%KEEP8
344 is =>id%IS
345 icntl=>id%ICNTL
346 info =>id%INFO
347C ASPK =>id%A
348C COLSCA =>id%COLSCA
349C ROWSCA =>id%ROWSCA
350 rinfog =>id%RINFOG
351 lp = icntl( 1 )
352 mp = icntl( 2 )
353 mpg = icntl( 3 )
354 lpok = ((lp.GT.0).AND.(id%ICNTL(4).GE.1))
355 prok = ((mp.GT.0).AND.(id%ICNTL(4).GE.2))
356 prokg = ( mpg .GT. 0 .and. id%MYID .eq. master )
357 prokg = (prokg.AND.(id%ICNTL(4).GE.2))
358 print_maxavg = .NOT.(id%NSLAVES.EQ.1 .AND. keep(46).EQ.1)
359 IF (.not.prok) mp =0
360 IF (.not.prokg) mpg=0
361 IF ( prok ) WRITE(mp,100)
362 IF ( prokg ) WRITE(mpg,100)
363 nb_bytes = 0_8
364 nb_bytes_max = 0_8
365 nb_bytes_extra = 0_8
366 k34_8 = int(keep(34), 8)
367 k35_8 = int(keep(35), 8)
368 k16_8 = int(keep(16), 8)
369 nbent_rhscomp = 0
370C Used by DISTRIBUTED_SOLUTION to skip empty columns
371C that are skipped (case of sparse RHS)
372 nb_rhsskipped = 0
373C next 4 initialisations needed in case of error
374C to free space allocated
375 lscal = .false.
376 work_wcb_allocated = .false.
377 icntl21 = -99998 ! will be bcasted later to slaves
378 ibeg_rhscomp =-152525_8 ! Should not be used
379 build_posinrhscomp = .true.
380 ibeg_glob_def = -9888 ! unitialized state
381 iend_glob_def = -9888 ! unitialized state
382 ibeg_root_def = -9777 ! unitialized state
383 iend_root_def = -9777 ! unitialized state
384 iroot_def_rhs_col1 = -9666 ! unitialized state
385C Not needed anymore (since new version of gather)
386C LD_RHSCOMP = max(KEEP(89),1) ! at the nb of pivots eliminated on
387 ! that proc
388 ld_rhscomp = 1
389 nb_fs_rhscomp_tot = keep(89)
390! number of FS var of the pruned tree
391! mapped on this proc
392 nb_fs_rhscomp_f = nb_fs_rhscomp_tot
393C Save value of KEEP(350), in case of LR solve
394C KEEP(350) may be overwritten and restored
395C Old unoptimized version before 5.0.2 not available anymore
396 IF (keep(350).LE.0) keep(350)=1
397 IF (keep(350).GT.2) keep(350)=1
398 keep350_save = keep(350)
399C
400C Depending on the type of parallelism,
401C the master can have the role of a slave
402 i_am_slave = ( id%MYID .ne. master .OR.
403 & ( id%MYID .eq. master .AND.
404 & keep(46) .eq. 1 ) )
405C
406C Compute the number of integers and nb of reals in the structure
407 CALL cmumps_size_in_struct (id, nb_int, nb_cmplx, nb_char)
408 nb_bytes = nb_bytes + nb_int * k34_8 + nb_cmplx * k35_8 + nb_char
409 nb_bytes_on_entry = nb_bytes !used to check alloc/dealloc count ok
411 nb_bytes_max = max(nb_bytes_max,nb_bytes)
412C ======================================
413C BEGIN CHECK KEEP ENTRIES AND INTERFACE
414C ======================================
415C The checks below used to be in CMUMPS_DRIVER. It is much better
416C to have them here in CMUMPS_SOL_DRIVER because this enables
417C more flexibility in the management of priorities between various
418C checks.
419 IF (id%MYID .EQ. master) THEN
420c subroutine only because called at facto and solve
421 CALL cmumps_set_k221(id)
422 id%KEEP(111) = id%ICNTL(25)
423C For the case of ICNTL(20)=1 one could
424C switch off exploit sparsity when RHS is too dense.
425 IF (id%ICNTL(20) .EQ. 1) id%KEEP(235) = -1 !automatic
426 IF (id%ICNTL(20) .EQ. 2) id%KEEP(235) = 0 !off
427 IF (id%ICNTL(20) .EQ. 3) id%KEEP(235) = 1 !on
428 IF (id%ICNTL(20).EQ.1 .or. id%ICNTL(20).EQ.2 .or.
429 & id%ICNTL(20).EQ.3) THEN
430 id%KEEP(248) = 1 !sparse RHS
431 ELSE IF (id%ICNTL(20).EQ.10 .OR. id%ICNTL(20).EQ.11) THEN
432 id%KEEP(248) = -1 ! dist. RHS
433 ELSE
434 id%KEEP(248) = 0 !dense RHS
435 ENDIF
436 icntl21 = id%ICNTL(21)
437 IF (icntl21 .ne.0.and.icntl21.ne.1) icntl21=0
438 IF ( id%ICNTL(30) .NE.0 ) THEN
439C A-1 is on
440 id%KEEP(237) = 1
441 ELSE
442C A-1 is off
443 id%KEEP(237) = 0
444 ENDIF
445 IF (id%KEEP(248) .eq.0.and. id%KEEP(237).ne.0) THEN
446C For A-1 we have a sparse RHS in the API.
447C Force KEEP(248) accordingly.
448 id%KEEP(248)=1
449 ENDIF
450 IF ((id%KEEP(221).EQ.2 ).AND.(id%KEEP(248).NE.0) ) THEN
451C -- input RHS is indeed stored in REDRHS and RHSCOMP
452 id%KEEP(248) = 0
453 ENDIF
454 IF ((id%KEEP(221).EQ.2 ).AND.(id%KEEP(235).NE.0) ) THEN
455C -- input RHS is in fact effectively
456C -- stored in REDRHS and RHSCOMP
457 id%KEEP(235) = 0
458 ENDIF
459 IF ( (id%KEEP(248).EQ.0).AND.(id%KEEP(111).EQ.0) ) THEN
460C RHS is not sparse and thus exploit sparsity is reset to 0
461 id%KEEP(235) = 0
462 ENDIF
463 IF (keep(248) .EQ. -1) THEN
464C V0 distributed RHS: no ES
465 id%KEEP(235) = 0
466 ENDIF
467C Case of Automatic setting of exploit sparsity (KEEP(235)=-1)
468C (in MUMPS_DRIVER original value of KEEP(235) is reset)
469 IF(id%KEEP(111).NE.0) id%KEEP(235)=0
470C
471 IF (id%KEEP(235).EQ.-1) THEN
472 IF (id%KEEP(237).NE.0) THEN
473C for A-1
474 id%KEEP(235)=1
475 ELSE
476 id%KEEP(235)=1
477 ENDIF
478 ELSE IF (id%KEEP(235).NE.0) THEN
479 id%KEEP(235)=1
480 ENDIF
481C Setting of KEEP(242) (permute RHS)
482 IF ((keep(111).NE.0)) THEN
483C In the context of null space, the null pivots
484C are by default permuted to post-order
485C However for null space there is in this case no need to
486C permute null pivots since they are already in correct order.
487C Setting KEEP(242)=1 would just force to go through
488C part of the code permuting to identity.
489C Apart for validation purposes this is not interesting
490C costly (and more risky).
491 keep(242) = 0
492 ENDIF
493 IF (keep(248).EQ.0.AND.keep(111).EQ.0) THEN
494C Permutation possible if sparse RHS
495C (KEEP(248).NE.0: A-1 or General Sparse)
496C or null space (even if in current version
497C it is deactived)
498 keep(242) = 0
499 ENDIF
500 IF ((keep(242).NE.0).AND.keep(237).EQ.0) THEN
501 IF ((keep(242).NE.-9).AND.keep(242).NE.1.AND.
502 & keep(242).NE.-1) THEN
503C Reset it to 0
504 keep(242) = 0
505 ENDIF
506 ENDIF
507 IF (keep(242).EQ.-9) THEN
508C {
509C Automatic setting of permute RHS
510 IF (id%KEEP(237).NE.0) THEN
511 keep(242) = 1 ! postorder for A-1
512 ELSE ! dense or general sparse or distributed rhs
513 keep(242) = 0 ! no permutation in most general case
514 IF (keep(248) .EQ. 1) THEN ! sparse RHS
515 IF (id%KEEP(235) .EQ. 1) THEN ! Tree pruning
516 IF (id%NRHS .GT. 1) THEN
517 IF (keep(497).EQ.-1 .OR. keep(497).GE.1) THEN
518 keep(242)=1
519 ENDIF
520 ENDIF
521 ENDIF
522 ENDIF
523 ENDIF
524C }
525 ENDIF
526 IF ( (id%KEEP(221).EQ.1 ).AND.(id%KEEP(235).NE.0) ) THEN
527C -- Do not permute RHS with REDRHS for the time being
528 id%KEEP(242) = 0
529 ENDIF
530 IF (keep(242).EQ.0) keep(243)=0 ! interleave off
531 IF ((keep(237).EQ.0).OR.(keep(242).EQ.0)) THEN
532C Interleave (243) possible only
533C when permute RHS (242) is on and with A-1
534 keep(243) = 0
535 ENDIF
536 IF (id%KEEP(237).EQ.1) THEN ! A-1 entries
537C Case of automatic setting of KEEP(243), KEEP(493-498)
538C (exploit sparsity parameters)
539 IF (id%NSLAVES.EQ.1) THEN
540 IF (id%KEEP(243).EQ.-1) id%KEEP(243)=0
541 IF (id%KEEP(495).EQ.-1) id%KEEP(495)=1
542 IF (id%KEEP(497).EQ.-1) id%KEEP(497)=1
543 ELSE
544 IF (id%KEEP(243).EQ.-1) id%KEEP(243)=1
545 IF (id%KEEP(495).EQ.-1) id%KEEP(495)=1
546 IF (id%KEEP(497).EQ.-1) id%KEEP(497)=1
547 ENDIF
548 ELSE ! dense or general sparse or distributed RHS
549 id%KEEP(243)=0
550 id%KEEP(495)=0
551 IF (keep(248) .EQ. 1) THEN ! sparse RHS
552 IF (id%KEEP(235) .EQ. 1) THEN ! Tree pruning
553 IF (id%NRHS .GT. 1) THEN
554 IF (id%KEEP(497).EQ.-1) id%KEEP(497)=1
555 ENDIF
556 ENDIF
557 ELSE
558C nbsparse meaningless for distributed or dense RHS
559C Force it to 0 whatever was the initial value
560 id%KEEP(497)=0
561 ENDIF
562 ENDIF
563 mtype = id%ICNTL( 9 )
564 IF (mtype.NE.1) mtype=0 ! see interface
565 IF ((mtype.EQ.0).AND.keep(50).NE.0) mtype =1
566! suppress option Atx=b for A-1
567 IF (id%KEEP(237).NE.0) mtype = 1
568C
569C ICNTL(35) was defined at analysis and
570C consistently reset at factorization
571C It was stored in KEEP(486) after factorization
572C Set KEEP(485) accordingly.
573C
574 IF (keep(486) .EQ. 2) THEN
575 keep(485) = 1 ! BLR solve
576 ELSE
577 keep(485) = 0 ! FR solve
578 ENDIF
579 ENDIF
580C Bcast id%KEEP(401) strategy (which
581C may be switched off or on during solve)
582 CALL mpi_bcast( id%KEEP(401), 1, mpi_integer, master, id%COMM,
583 & ierr )
584 CALL mpi_bcast(mtype,1,mpi_integer,master,
585 & id%COMM,ierr)
586 CALL mpi_bcast( id%KEEP(111), 1, mpi_integer, master, id%COMM,
587 & ierr )
588 CALL mpi_bcast( id%KEEP(221), 1, mpi_integer, master, id%COMM,
589 & ierr )
590 CALL mpi_bcast( id%KEEP(235), 1, mpi_integer, master, id%COMM,
591 & ierr )
592 CALL mpi_bcast( id%KEEP(237), 1, mpi_integer, master, id%COMM,
593 & ierr )
594 CALL mpi_bcast( id%KEEP(242), 2, mpi_integer, master, id%COMM,
595 & ierr )
596 CALL mpi_bcast( id%KEEP(248), 1, mpi_integer, master, id%COMM,
597 & ierr )
598 CALL mpi_bcast( id%KEEP(350), 1, mpi_integer, master, id%COMM,
599 & ierr )
600 CALL mpi_bcast( id%KEEP(485), 1, mpi_integer, master, id%COMM,
601 & ierr )
602 CALL mpi_bcast( id%KEEP(495), 3, mpi_integer, master, id%COMM,
603 & ierr )
604 CALL mpi_bcast( icntl21, 1, mpi_integer, master, id%COMM, ierr )
605C Broadcast original id%NRHS (used at least for checks on SOL_loc
606C and to allocate PERM_RHS in case of exploit sparsity)
607 CALL mpi_bcast( id%NRHS,1, mpi_integer, master, id%COMM,ierr)
608C
609C TIMINGS: reset to 0
610 timec2=0.0d0
611 timecopyscale2=0.0d0
612 timegather2=0.0d0
613 timescatter2=0.0d0
614 id%DKEEP(112)=0.0e0
615 id%DKEEP(113)=0.0e0
616C id%DKEEP(114) time for the iterative refinement
617C id%DKEEP(120) time for the error analysis
618C id%DKEEP(121) time for condition number
619C id%DKEEP(122) time for matrix redistribution (copy+scale solution)
620 id%DKEEP(114)=0.0e0
621 id%DKEEP(120)=0.0e0
622 id%DKEEP(121)=0.0e0
623 id%DKEEP(115)=0.0e0
624 id%DKEEP(116)=0.0e0
625 id%DKEEP(122)=0.0e0
626C Time for fwd, bwd and scalapack is
627C accumulated in DKEEP(117-119) within SOL_C
628C If requested time for each call to FWD/BWD
629C might be print but on output to solve
630C phase DKEEP will hold on each proc the accumulated time
631 id%DKEEP(117)=0.0e0
632 id%DKEEP(118)=0.0e0
633 id%DKEEP(119)=0.0e0
634 id%DKEEP(123)=0.0e0
635 id%DKEEP(124)=0.0e0
636 id%DKEEP(125)=0.0e0
637 id%DKEEP(126)=0.0e0
638 id%DKEEP(127)=0.0e0
639 id%DKEEP(128:134)=0.0e0
640 id%DKEEP(140:153)=0.0e0
641C
642 CALL mumps_secdeb(time3)
643C ------------------------------
644C Check parameters on the master
645C ------------------------------
646 IF ( id%MYID .EQ. master ) THEN
647 IF ((keep(23).NE.0).AND.keep(50).NE.0) THEN
648C Maximum transversal permutation
649C has not been saved (KEEP(23)>0 and UNS_PERM allocated)
650C when matrix is symmetric.
651 IF (prokg) WRITE(mpg,'(A)')
652 & ' Internal Error 1 in solution driver '
653 id%INFO(1)=-444
654 id%INFO(2)=keep(23)
655 ENDIF
656C ------------------------------------
657C Check that factors are available
658C either in-core or on disk, case
659C where factors were discarded during
660C factorization (e.g. useful to simulate
661C an OOC factorization or just get nb of
662C negative pivots or determinant)
663C ------------------------------------
664 IF (keep(201) .EQ. -1) THEN
665 IF (prokg) THEN
666 WRITE(mpg,'(A)')
667 & ' ERROR: Solve impossible because factors not kept'
668 ENDIF
669 id%INFO(1)=-44
670 id%INFO(2)=keep(251)
671 GOTO 333
672 ELSE IF (keep(221).EQ.0 .AND. keep(251) .EQ. 2
673 & .AND. keep(252).EQ.0) THEN
674 IF (prokg) THEN
675 WRITE(mpg,'(A)')
676 & ' ERROR: Solve impossible because factors not kept'
677 ENDIF
678 id%INFO(1)=-44
679 id%INFO(2)=keep(251)
680 GOTO 333
681 ENDIF
682C ------------------
683 IF (keep(252).NE.0 .AND. id%NRHS .NE. id%KEEP(253)) THEN
684C Fwd in facto
685C KEEP(252-253) available on all procs since analysis phase
686C Error: id%NRHS is not allowed to change since analysis
687C because fwd has been performed during facto with
688C KEEP(253) RHS
689 IF (prokg) THEN
690 WRITE(mpg,'(A)')
691 & ' ERROR: id%NRHS not allowed to change when',
692 & ' ICNTL(32)=1'
693 ENDIF
694 id%INFO(1)=-42
695 id%INFO(2)=id%KEEP(253)
696 GOTO 333
697 ENDIF
698C Testing MTYPE instead of ICNTL(9)
699 IF (keep(252).NE.0 .AND. mtype.NE.1) THEN
700C Fwd in facto is not compatible with transpose system
701 info(1) = -43
702 info(2) = 9
703 IF (prokg) THEN
704 WRITE(mpg,'(A)')
705 & .NE.' ERROR: Transpose system (ICNTL(9)0) not ',
706 & ' compatible with forward performed during',
707 & ' factorization (ICNTL(32)=1)'
708 ENDIF
709 GOTO 333
710 ENDIF
711 IF (keep(248) .NE. 0.AND.keep(252).NE.0) THEN
712C Fwd during facto incompatible with sparse RHS
713C Forbid sparse RHS when Fwd performed during facto
714C Sparse RHS may be due to A-1 (ICNTL(30)
715 info(1) = -43
716 IF (keep(237).NE.0) THEN
717 info(2) = 30 ! ICNTL(30)
718 IF (prokg) THEN
719 WRITE(mpg,'(A)')
720 & ' ERROR: A-1 functionality incompatible with',
721 & ' forward performed during factorization',
722 & ' (ICNTL(32)=1)'
723 ENDIF
724 ELSE
725 info(2) = 20 ! ICNTL(20)
726 IF (prokg) THEN
727 WRITE(mpg,'(A)')
728 & ' ERROR: sparse or dist. RHS incompatible with forward',
729 & ' elimination during factorization (ICNTL(32)=1)'
730 ENDIF
731 ENDIF
732 GOTO 333
733 ENDIF
734 IF (keep(237) .NE. 0 .AND. icntl21.NE.0) THEN
735 IF (prokg) THEN
736 WRITE(mpg,'(A)')
737 & ' ERROR: A-1 functionality is incompatible',
738 & ' with distributed solution.'
739 ENDIF
740 info(1)=-48
741 info(2)=21
742 GOTO 333
743 ENDIF
744 IF (keep(237) .NE. 0 .AND. keep(60) .NE.0) THEN
745 IF (prokg) THEN
746 WRITE(mpg,'(A)')
747 & ' ERROR: A-1 functionality is incompatible',
748 & ' with Schur.'
749 ENDIF
750 info(1)=-48
751 info(2)=19
752 GOTO 333
753 ENDIF
754 IF (keep(237) .NE. 0 .AND. keep(111) .NE.0) THEN
755 IF (prokg) THEN
756 WRITE(mpg,'(A)')
757 & ' ERROR: A-1 functionality is incompatible',
758 & ' with null space.'
759 ENDIF
760 info(1)=-48
761 info(2)=25
762 GOTO 333
763 ENDIF
764 IF (id%NRHS .LE. 0) THEN
765 id%INFO(1)=-45
766 id%INFO(2)=id%NRHS
767 IF ((id%KEEP(111).NE.0).AND.(id%INFOG(28).EQ.0)) THEN
768 IF (prokg) THEN
769 WRITE(mpg,'(A)')
770 & 'ICNTL(25) NE 0 but INFOG(28)=0',
771 & ' the matrix is not deficient'
772 ENDIF
773 ENDIF
774 GOTO 333
775 ENDIF
776C Entries of A-1 are stored in place of the input sparse RHS
777C thus no need for RHS to be allocated.
778 IF ( (id%KEEP(237).EQ.0) ) THEN
779 IF ((id%KEEP(248) == 0 .AND.keep(221).NE.2)
780 & .OR. icntl21==0) THEN
781C RHS must be of size N on the master either to
782C store the dense centralized RHS, either to store
783C the dense centralized solution.
785 & (id%RHS,id%INFO,id%N,id%NRHS,id%LRHS)
786 IF (id%INFO(1) .LT. 0) GOTO 333
787 ENDIF
788 ELSE
789C Check that the constraint NRHS=N is respected
790C Check for valid sparse RHS structure done
791 IF (id%NRHS .NE. id%N) THEN
792 id%INFO(1)=-47
793 id%INFO(2)=id%NRHS
794 GOTO 333
795 ENDIF
796 ENDIF
797 IF (id%KEEP(248) == 1) THEN
798C ------------------------------------
799C RHS_SPARSE, IRHS_SPARSE and IRHS_PTR
800C must be allocated of adequate size
801C ------------------------------------
802 IF (( id%NZ_RHS .LE.0 ).AND.(keep(237).NE.0)) THEN
803C At least one entry of A-1 must be requested
804 id%INFO(1)=-46
805 id%INFO(2)=id%NZ_RHS
806 GOTO 333
807 ENDIF
808 IF (( id%NZ_RHS .LE.0 ).AND.(keep(221).EQ.1)) THEN
809C At least one entry of RHS must be nonzero with
810c Schur reduced RHS option
811 id%INFO(1)=-46
812 id%INFO(2)=id%NZ_RHS
813 GOTO 333
814 ENDIF
815 IF ( id%NZ_RHS .GT. 0 ) THEN
816 IF ( .not. associated(id%RHS_SPARSE) )THEN
817 id%INFO(1)=-22
818 id%INFO(2)=10
819 GOTO 333
820 ENDIF
821 ENDIF
822 IF (id%NZ_RHS .GT. 0) THEN
823 IF ( .not. associated(id%IRHS_SPARSE) )THEN
824 id%INFO(1)=-22
825 id%INFO(2)=11
826 GOTO 333
827 ENDIF
828 ENDIF
829 IF ( .not. associated(id%IRHS_PTR) )THEN
830 id%INFO(1)=-22
831 id%INFO(2)=12
832 GOTO 333
833 ENDIF
834C
835 IF (size(id%IRHS_PTR) < id%NRHS + 1) THEN
836 id%INFO(1)=-22
837 id%INFO(2)=12
838 GOTO 333
839 END IF
840 IF (id%IRHS_PTR(id%NRHS + 1).ne.id%NZ_RHS+1) THEN
841 id%INFO(1)=-27
842 id%INFO(2)=id%IRHS_PTR(id%NRHS+1)
843 GOTO 333
844 END IF
845C compare with dble to prevent overflow
846 IF (dble(id%N)*dble(id%NRHS).LT.dble(id%NZ_RHS)) THEN
847C Possible in case of dupplicate entries in Sparse RHS
848 IF (prokg) THEN
849 write(mpg,*)
850 & " WARNING: many dupplicate entries in ",
851 & " sparse RHS provided by the user ",
852 & " id%NZ_RHS,id%N,id%NRHS =",
853 & id%NZ_RHS,id%N,id%NRHS
854 ENDIF
855 END IF
856 IF (id%IRHS_PTR(1).ne.1) THEN
857 id%INFO(1)=-28
858 id%INFO(2)=id%IRHS_PTR(1)
859 GOTO 333
860 END IF
861 IF (size(id%IRHS_SPARSE) < id%NZ_RHS) THEN
862 id%INFO(1)=-22
863 id%INFO(2)=11
864 GOTO 333
865 END IF
866 IF (size(id%RHS_SPARSE) < id%NZ_RHS) THEN
867 id%INFO(1)=-22
868 id%INFO(2)=10
869 GOTO 333
870 END IF
871 ENDIF
872C --------------------------------
873C Set null space options for solve
874C --------------------------------
875 CALL cmumps_get_ns_options_solve(icntl(1),keep(1),
876 & id%NRHS,
877 & mpg,info(1))
878 IF (info(1) .LT. 0) GOTO 333
879C
880 END IF ! MASTER
881C --------------------------------------
882C Check distributed solution vectors
883C --------------------------------------
884 IF (icntl21==1) THEN
885 IF ( i_am_slave ) THEN
886C (I)SOL_loc should be allocated to hold the
887C distributed solution on exit
888 IF ( id%LSOL_loc < id%KEEP(89) ) THEN
889 id%INFO(1)= -29
890 id%INFO(2)= id%LSOL_loc
891 GOTO 333
892 ENDIF
893 IF (id%KEEP(89) .NE. 0) THEN
894 IF ( .not. associated(id%ISOL_loc) )THEN
895 id%INFO(1)=-22
896 id%INFO(2)=13
897 GOTO 333
898 ENDIF
899 IF ( .not. associated(id%SOL_loc) )THEN
900 id%INFO(1)=-22
901 id%INFO(2)=14
902 GOTO 333
903 ENDIF
904 IF (size(id%ISOL_loc) < id%KEEP(89) ) THEN
905 id%INFO(1)=-22
906 id%INFO(2)=13
907 GOTO 333
908 END IF
909# if defined(MUMPS_F2003)
910 IF (size(id%SOL_loc,kind=8) <
911 & int(id%NRHS-1,8)*int(id%LSOL_loc,8)+
912 & int(id%KEEP(89),8)) THEN
913 id%INFO(1)=-22
914 id%INFO(2)=14
915 GOTO 333
916 END IF
917# else
918C Warning: size returns a standard INTEGER and could
919C overflow if id%SOL_loc was allocated of size > 2^31-1;
920C still we prefer to perform this test since only (1) very
921C large problems with large NRHS and small numbers of MPI
922C can result in such a situation; (2) the test could be
923C suppressed if needed but might be still be ok in case
924C the right-hand side overflows too.
925 IF (size(id%SOL_loc) <
926 & (id%NRHS-1)*id%LSOL_loc+id%KEEP(89)) THEN
927 id%INFO(1)=-22
928 id%INFO(2)=14
929 GOTO 333
930 END IF
931# endif
932 ENDIF
933 ENDIF
934 ENDIF
935 IF (id%MYID .NE. master) THEN
936 IF (id%KEEP(248) == 1) THEN
937C RHS should NOT be associated
938C if I am not master since it is
939C not even used to store the solution
940 IF ( associated( id%RHS ) ) THEN
941 id%INFO( 1 ) = -22
942 id%INFO( 2 ) = 7
943 GOTO 333
944 END IF
945 IF ( associated( id%RHS_SPARSE ) ) THEN
946 id%INFO( 1 ) = -22
947 id%INFO( 2 ) = 10
948 GOTO 333
949 END IF
950 IF ( associated( id%IRHS_SPARSE ) ) THEN
951 id%INFO( 1 ) = -22
952 id%INFO( 2 ) = 11
953 GOTO 333
954 END IF
955 IF ( associated( id%IRHS_PTR ) ) THEN
956 id%INFO( 1 ) = -22
957 id%INFO( 2 ) = 12
958 GOTO 333
959 END IF
960 END IF
961 ENDIF
962 IF (i_am_slave .AND. id%KEEP(248).EQ.-1) THEN
964 & id%Nloc_RHS,
965 & id%LRHS_loc,
966 & id%NRHS,
967 & id%IRHS_loc,
968 & id%RHS_loc,
969 & id%INFO)
970 IF (id%INFO(1) .LT. 0) GOTO 333
971 ENDIF
972C Prepare pointers to pass POINTERS(1) to
973C routines with implicit interfaces which
974C will then assume contiguous information
975C without needing to copy pointer arrays
976C in and out. Do this even if KEEP(248)
977C is different from -1 because of the
978C call to CMUMPS_DISTSOL_INDICES
979 IF (associated(id%IRHS_loc)) THEN
980 IF (size(id%IRHS_loc) .NE. 0) THEN
981 irhs_loc_ptr=>id%IRHS_loc
982 ELSE
983C so that IRHS_loc_PTR(1) is ok
984 irhs_loc_ptr=>idummy_target
985 ENDIF
986 ELSE
987 irhs_loc_ptr=>idummy_target
988 ENDIF
989 IF (associated(id%RHS_loc)) THEN
990 IF (size(id%RHS_loc) .NE. 0) THEN
991 idrhs_loc=>id%RHS_loc
992 ELSE
993 idrhs_loc=>cdummy_target
994 ENDIF
995 ELSE
996 idrhs_loc=>cdummy_target
997 ENDIF
998 IF (i_am_slave .AND. icntl21.EQ.1 .AND.
999 & keep(248) .EQ. -1) THEN ! Dist RHS and dist solution
1000 IF (associated(id%RHS_loc) .AND.
1001 & associated(id%SOL_loc)) THEN
1002 IF (id%KEEP(89).GT.0) THEN
1003C ----------------------------------------------------
1004C Check if RHS_loc and SOL_loc point to same object...
1005C id%SOL_loc(1) ok otherwise an error -22/14
1006C would have been raised earlier.
1007C idRHS_loc(1) may point to CDUMMY but is ok
1008C ----------------------------------------------------
1009 CALL mumps_size_c(idrhs_loc(1),id%SOL_loc(1),
1010 & diff_sol_loc_rhs_loc)
1011C ----------------------------------------
1012C Check for compatible dimensions in case
1013C SOL_loc and RHS_loc point to same memory
1014C ----------------------------------------
1015 IF (diff_sol_loc_rhs_loc .EQ. 0_8 .AND.
1016 & id%LSOL_loc .GT. id%LRHS_loc) THEN
1017C Note that, depending on the block size,
1018C if all columns are processed in one
1019C shot, this could still work. However,
1020C and since this was forbidden in the UG,
1021C we raise the error systematically
1022 id%INFO(1)=-56
1023 id%INFO(2)=id%LRHS_loc
1024 IF (lpok) THEN
1025 WRITE(lp,'(A,I9,A,I9)')
1026 &" ** Error RHS_loc and SOL_loc pointers match but LRHS_loc="
1027 &,id%LRHS_loc, " and LSOL_loc=", id%LSOL_loc
1028 ENDIF
1029 ENDIF
1030 ENDIF
1031 ENDIF
1032 ENDIF
1033 IF (id%MYID.EQ.master) THEN
1034C Do some checks (REDRHS), depending on KEEP(221)
1036 END IF ! MYID.EQ.MASTER
1037 IF (id%INFO(1) .LT. 0) GOTO 333
1038C -------------------------
1039C Propagate possible errors
1040C -------------------------
1041 333 CONTINUE
1042 CALL mumps_propinfo( id%ICNTL(1),
1043 & id%INFO(1),
1044 & id%COMM, id%MYID )
1045 IF ( id%INFO(1) .LT. 0 ) GO TO 90
1046C ====================================
1047C END CHECK INTERFACE AND KEEP ENTRIES
1048C ====================================
1049C ====================================
1050C Process case of NZ_RHS = 0 with
1051C sparse RHS and General Sparse (NOT A-1)
1052C -----------------------------------
1053 IF ((id%KEEP(248).EQ.1).AND.(id%KEEP(237).EQ.0)) THEN
1054C
1055 CALL mpi_bcast(id%NZ_RHS,1,mpi_integer,master,
1056 & id%COMM,ierr)
1057C
1058 IF (id%NZ_RHS.EQ.0) THEN
1059C We reset solution to zero and we return
1060C (first freeing working space at label 90)
1061 IF ((icntl21.EQ.1).AND.(i_am_slave)) THEN
1062C ----------------------
1063C SOL_loc reset to zero
1064C ----------------------
1065C ----------------------
1066C Prepare ISOL_loc array
1067C ----------------------
1068 liw_passed=max(1,keep(32))
1069C Only called if more than 1 pivot
1070C was eliminated by the processor.
1071C Note that LSOL_loc >= KEEP(89)
1072 IF (keep(89) .GT. 0) THEN
1073 CALL cmumps_distsol_indices( mtype, id%ISOL_loc(1),
1074 & id%PTLUST_S(1),
1075 & id%KEEP(1),id%KEEP8(1),
1076 & id%IS(1), liw_passed,id%MYID_NODES,
1077 & id%N, id%STEP(1), id%PROCNODE_STEPS(1),
1078 & id%NSLAVES, scaling_data_sol, lscal
1079C For checking only
1080 & , .false., idummy(1), 1
1081 & )
1082 DO j=1, id%NRHS
1083 DO i=1, keep(89)
1084 id%SOL_loc((j-1)*id%LSOL_loc + i) =zero
1085 ENDDO
1086 ENDDO
1087 ENDIF
1088 ENDIF
1089 IF (icntl21.NE.1) THEN ! centralized solution
1090C ----------------------------
1091C RHS reset to zero on master
1092C ----------------------------
1093 IF (id%MYID.EQ.master) THEN
1094 DO j=1, id%NRHS
1095 DO i=1, id%N
1096 id%RHS(int(j-1,8)*int(id%LRHS,8) + int(i,8)) =zero
1097 ENDDO
1098 ENDDO
1099 ENDIF
1100 ENDIF
1101C
1102C print solve phase stats if requested
1103 IF ( prokg ) THEN
1104C write(6,*) " NZ_RHS is zero "
1105 WRITE( mpg, 150 )
1106C ICNTL(35) should not been accessed during SOLVE thus
1107C print KEEP(486) value set during factorization
1108 & id%NRHS, icntl(27), icntl(9), icntl(10), icntl(11),
1109 & icntl(20), icntl(21), icntl(30), keep(486)
1110 IF (keep(221).NE.0) THEN
1111 WRITE (mpg, 152) keep(221)
1112 ENDIF
1113 IF (keep(252).GT.0) THEN ! Fwd during facto
1114 WRITE (mpg, 153) keep(252)
1115 ENDIF
1116 ENDIF
1117C
1118C --------
1119 GOTO 90 ! end of solve deallocate what is needed
1120C ====================================
1121C END CHECK INTERFACE AND KEEP ENTRIES
1122C ====================================
1123 ENDIF ! test NZ_RHS.EQ.0
1124C --------
1125 ENDIF ! (id%KEEP(248).EQ.1).AND.(id%KEEP(237).EQ.0)
1126 interleave_par =.false.
1127 do_permute_rhs =.false.
1128C
1129 IF ((id%KEEP(235).NE.0).or.(id%KEEP(237).NE.0)) THEN
1130C Case of pruned elimination tree or selected entries in A-1
1131 IF (id%KEEP(237).NE.0.AND.
1132 & id%KEEP(248).EQ.0) THEN
1133C When A-1 is requested (keep(237).ne.0)
1134C sparse RHS has been forced to be on.
1135 IF (lpok) THEN
1136 WRITE(lp,'(A,I4,I4)')
1137 & ' Internal Error 2 in solution driver (A-1) ',
1138 & id%KEEP(237), id%KEEP(248)
1139 ENDIF
1140 CALL mumps_abort()
1141 ENDIF
1142C NBT is inout in MUMPS_REALLOC and should be initialized.
1143 nbt = 0
1144C -- Allocate Step2node on each proc
1145 CALL mumps_realloc(id%Step2node, id%KEEP(28), id%INFO, lp,
1146 & force=.true.,
1147 & string='id%Step2node (Solve)', memcnt=nbt, errcode=-13)
1148 CALL mumps_propinfo( icntl(1), info(1),
1149 & id%COMM, id%MYID )
1150 IF ( info(1).LT.0 ) RETURN
1151C -- build Step2node on each proc;
1152C -- this is usefull to have at each step a unique
1153C -- representative node (associated with principal variable of
1154C -- that node.
1155 IF (nbt.NE.0) THEN
1156 ! Step2node was reallocated and needs be recomputed
1157 DO i=1, id%N
1158 IF (id%STEP(i).LE.0) cycle ! nonprincipal variables
1159 id%Step2node(id%STEP(i)) = i
1160 ENDDO
1161C ELSE
1162C we reuse Step2node computed in a previous solve phase
1163C Step2node is deallocated each time a new analysis is
1164C performed or when job=-2 is called
1165 ENDIF
1166 nb_bytes = nb_bytes + nbt*k34_8
1167 nb_bytes_max = max(nb_bytes_max,nb_bytes)
1168 nb_bytes_extra = nb_bytes_extra + nbt * k34_8
1169C Mapping information used during solve. In case of several
1170C facto+solve it has to be recomputed.
1171C In case of several solves with the same
1172C facto, it is not recomputed.
1173C It used to compute the interleaving
1174C for A-1, and, in dev_version, passed to sol_c to compute
1175C some stats
1176 IF((keep(235).NE.0).OR.(keep(237).NE.0)) THEN
1177 IF(.NOT.associated(id%IPTR_WORKING)) THEN
1179 END IF
1180 END IF
1181 ENDIF
1182C
1183C Initialize SIZE_OF_BLOCK from MUMPS_SOL_ES module
1184 IF ( i_am_slave )
1185 & CALL cmumps_sol_es_init(id%OOC_SIZE_OF_BLOCK, id%KEEP(201))
1186 do_null_piv = .true.
1187 nbcol_inbloc = -9998
1188 nz_this_block= -9998
1189 jbeg_rhs = -9998
1190c
1191 IF (id%MYID.EQ.master) THEN ! Compute NRHS_NONEMPTY
1192C
1193C -- Sparse RHS does
1194 IF ( keep(111)==0 .AND. keep(248)==1
1195 & ) THEN
1196C -- Note that KEEP(111).NE.0 (null space on)
1197C -- and KEEP(248).NE.0 will be made incompatible
1198C -- When computing entries of A-1 (or SparseRHS only)
1199 nrhs_nonempty = 0
1200 DO i=1, id%NRHS
1201 IF (id%IRHS_PTR(i).LT.id%IRHS_PTR(i+1))
1202 & nrhs_nonempty = nrhs_nonempty+1 !ith col in non empty
1203 ENDDO
1204 IF (nrhs_nonempty.LE.0) THEN
1205C Internal error: tested before in mumps_driver
1206 IF (lpok)
1207 & WRITE(lp,*) " Internal Error 3 in solution driver ",
1208 & " NRHS_NONEMPTY= ",
1209 & nrhs_nonempty
1210 CALL mumps_abort()
1211 ENDIF
1212 ELSE
1213 nrhs_nonempty = id%NRHS
1214 ENDIF
1215 ENDIF
1216C ------------------------------------
1217C If there is a special root node,
1218C precompute mapping of root's master
1219C ------------------------------------
1220 size_root = -33333
1221 IF ( keep( 38 ) .ne. 0 ) THEN
1222 master_root = mumps_procnode(
1223 & id%PROCNODE_STEPS(id%STEP( keep(38))),
1224 & keep(199) )
1225 IF (id%MYID_NODES .eq. master_root) THEN
1226 size_root = id%root%TOT_ROOT_SIZE
1227 ELSE IF ((id%MYID.EQ.master).AND.keep(60).NE.0) THEN
1228C SIZE_ROOT also used for KEEP(221).NE.0
1229 size_root=id%KEEP(116)
1230 ENDIF
1231 ELSE IF (keep( 20 ) .ne. 0 ) THEN
1232 master_root = mumps_procnode(
1233 & id%PROCNODE_STEPS(id%STEP(keep(20))),
1234 & keep(199) )
1235 IF (id%MYID_NODES .eq. master_root) THEN
1236 size_root = id%IS(
1237 & id%PTLUST_S(id%STEP(keep(20)))+keep(ixsz) + 3)
1238 ELSE IF ((id%MYID.EQ.master).AND.keep(60).NE.0) THEN
1239C SIZE_ROOT also used for KEEP(221).NE.0
1240 size_root=id%KEEP(116)
1241 ENDIF
1242 ELSE
1243 master_root = -44444
1244 END IF
1245C --------------
1246C Get block size
1247C --------------
1248C We work on a maximum of NBRHS at a time.
1249C The leading dimension of RHS is id%LRHS on the host process
1250C and it is set to N on slave processes.
1251 IF (id%MYID .eq. master) THEN
1252 keep(84) = icntl(27)
1253C Treating ICNTL(27)=0 as if ICNTL(27)=1
1254 IF(icntl(27).EQ.0) keep(84)=1
1255 IF (keep(252).NE.0) THEN
1256! Fwd in facto: all rhs (KEEP(253) need be processed in one pass
1257 nbrhs = keep(253)
1258 ELSE
1259 IF (keep(201) .EQ. 0 .OR. keep(84) .GT. 0) THEN
1260 nbrhs = abs(keep(84))
1261 ELSE
1262 nbrhs = -2*keep(84)
1263 END IF
1264 IF (nbrhs .GT. nrhs_nonempty ) nbrhs = nrhs_nonempty
1265C
1266 ENDIF
1267 ENDIF
1268#if defined(V_T)
1269 CALL vtbegin(glob_comm_ini,ierr)
1270#endif
1271C NRHS_NONEMPTY needed on all procs to allocate RHSCOMP on slaves
1272 CALL mpi_bcast(nrhs_nonempty,1,mpi_integer,master,
1273 & id%COMM,ierr)
1274 CALL mpi_bcast(nbrhs,1,mpi_integer,master,
1275 & id%COMM,ierr)
1276C
1277 IF (keep(201).GT.0) THEN
1278C --- id%KEEP(201) indicates if OOC is on (=1) of not (=0)
1279C -- 107: number of buffers
1280C Define number of types of files (L, possibly U)
1281 workspace_minimal_preferred = .false.
1282 IF (id%MYID .eq. master) THEN
1283 keep(107) = max(0,keep(107))
1284 IF ((keep(107).EQ.0).AND.
1285 & (keep(204).EQ.0).AND.(keep(211).NE.1) ) THEN
1286C -- default setting for release 4.8
1287 ! Case of
1288 ! -Emmergency buffer only and
1289 ! -Synchronous mode
1290 ! -NO_O_DIRECT (because of synchronous choice)
1291 ! THEN
1292 ! "Basic system-based version"
1293 ! We can force to allocate S to a minimal
1294 ! value.
1295 workspace_minimal_preferred=.true.
1296 ENDIF
1297 ENDIF
1298 CALL mpi_bcast( keep(107), 1, mpi_integer,
1299 & master, id%COMM, ierr )
1300 CALL mpi_bcast( keep(204), 1, mpi_integer,
1301 & master, id%COMM, ierr )
1302 CALL mpi_bcast( keep(208), 2, mpi_integer,
1303 & master, id%COMM, ierr )
1304 CALL mpi_bcast( workspace_minimal_preferred, 1,
1305 & mpi_logical,
1306 & master, id%COMM, ierr )
1307C --- end of OOC case
1308 ENDIF
1309 IF ( i_am_slave ) THEN
1310C
1311C NB_K133: Max number of simultaneously processed
1312C active fronts.
1313C Why more than one active node ?
1314C 1/ In parallel when we start a level 2 node
1315C then we do not know exactly when we will
1316C have received all contributions from the
1317C slaves.
1318C This is very critical in OOC since the
1319C size provided to the solve phase is
1320C much smaller and since we need
1321C to determine the size fo the buffers for IO.
1322C We pospone the allocation of the block NFRONT*NB_NRHS
1323C and solve the problem.
1324C
1325C
1326C 2/ While processing a node and sending information
1327C if we have not enough memory in send buffer
1328C then we must receive.
1329C We feel that this is not so critical.
1330C
1331 nb_k133 = 3
1332C
1333C To this we must add one time KEEP(133) to store
1334C the RHS of the root node if the root is local.
1335C Furthermore this quantity has to be multiplied by the
1336C blocking size in case of multiple RHS.
1337C
1338 IF ( keep( 38 ) .NE. 0 .OR. keep( 20 ) .NE. 0 ) THEN
1339 IF ( master_root .eq. id%MYID_NODES ) THEN
1340 IF (
1341 & .NOT. associated(id%root%RHS_CNTR_MASTER_ROOT)
1342 & ) THEN
1343 nb_k133 = nb_k133 + 1
1344 ENDIF
1345 END IF
1346 ENDIF
1347 lwcb8_min = int(nb_k133,8)*int(keep(133),8)*int(nbrhs,8)
1348C
1349C ---------------------------------------------------------------
1350C Set WK_USER_PROVIDED to true when workspace WK_USER is provided
1351C by user
1352C We can accept WK_USER to be provided on only one proc and
1353C different values of WK_USER per processor. Note that we are
1354C inside a block "IF (I_AM_SLAVE)"
1355 wk_user_provided = (id%LWK_USER.NE.0)
1356 IF (id%LWK_USER.EQ.0) THEN
1357 itmp8 = 0_8
1358 ELSE IF (id%LWK_USER.GT.0) THEN
1359 itmp8= int(id%LWK_USER,8)
1360 ELSE
1361 itmp8 = -int(id%LWK_USER,8)* 1000000_8
1362 ENDIF
1363C Incore: Check if the provided size is equal to that used during
1364C facto (case of ITMP8/=0 and KEEP8(24)/=ITMP8)
1365C But also check case of space not provided during solve
1366C but was provided during facto
1367C (case of ITMP8=0 and KEEP8(24)/=0)
1368 IF (keep(201).EQ.0) THEN ! incore
1369C Compare provided size with previous size
1370 IF (itmp8.NE.keep8(24)) THEN
1371C -- error when reusing space allocated
1372 info(1) = -41
1373 info(2) = id%LWK_USER
1374 GOTO 99 ! jump to propinfo
1375 ! (S is used in between and not allocated)
1376 ! NO COMM must occur then before next propinfo
1377 ! it happens in Mila's code but only with
1378 ! KEEP(209) > 0
1379 ENDIF
1380 ELSE
1381 keep8(24)=itmp8
1382 ENDIF
1383C KEEP8(24) holds the size of WK_USER provided by user.
1384C
1385 maxs = 0_8
1386 IF (wk_user_provided) THEN
1387 maxs = keep8(24)
1388 IF (maxs.LT. keep8(20)) THEN
1389 info(1)= -11
1390 ! MAXS should be increased by at least ITMP8
1391 itmp8 = keep8(20)+1_8-maxs
1392 CALL mumps_set_ierror(itmp8, info(2))
1393 ENDIF
1394 IF (info(1) .GE. 0 ) id%S => id%WK_USER(1:keep8(24))
1395 ELSE IF (associated(id%S)) THEN
1396C Avoid the use of "size(id%S)" because it returns
1397C a default integer that may overflow. Also "size(id%S,kind=8)"
1398C will only be available with Fortran 2003 compilers.
1399 maxs = keep8(23)
1400 ELSE
1401 ! S not allocated and WK_USER not provided ==> must be in OOC
1402 IF (keep(201).EQ.0) THEN ! incore
1403 WRITE(*,*) ' Working array S not allocated ',
1404 & ' on entry to solve phase (in core) '
1405 CALL mumps_abort()
1406 ELSE
1407C -- OOC and WK_USER not provided:
1408C define size (S) and allocate it
1409C ---- modify size of MAXS: in a simple
1410C ---- system-based version, we want to
1411C ---- use a small size for MAXS, to
1412C ---- avoid the system pagecache to be
1413C ---- polluted by 'our memory'
1414C
1415 IF ( keep(209).EQ.-1 .AND. workspace_minimal_preferred)
1416 & THEN
1417C We need space to load at least the largest factor
1418 maxs = keep8(20) + 1_8
1419 ELSE IF ( keep(209) .GE.0 ) THEN
1420C Use suggested value of MAXS provided in KEEP(209)
1421 maxs = max(int(keep(209),8), keep8(20) + 1_8)
1422 ELSE
1423 maxs = id%KEEP8(14) ! initial value: do not use more than
1424 ! minimum (non relaxed) size of OOC facto
1425 ENDIF
1426C
1427 maxs = max(maxs, id%KEEP8(20)+1_8)
1428 ALLOCATE (id%S(maxs), stat = allocok)
1429 keep8(23)=maxs
1430 IF ( allocok .GT. 0 ) THEN
1431 IF (lpok) THEN
1432 WRITE(lp,*) id%MYID,': problem allocation of S ',
1433 & 'at solve'
1434 ENDIF
1435 info(1) = -13
1436 CALL mumps_set_ierror(maxs, info(2))
1437 NULLIFY(id%S)
1438 keep8(23)=0_8
1439 ENDIF
1440 nb_bytes = nb_bytes + keep8(23) * k35_8
1441 nb_bytes_max = max(nb_bytes_max,nb_bytes)
1442C --- end of OOC case
1443 ENDIF
1444C -- end of id%S already associated
1445 ENDIF
1446C
1447C On the slaves, S is divided as follows:
1448C S(1..LA) holds the factors,
1449C S(LA+1..MAXS) is free workspace
1450 IF(keep(201).EQ.0)THEN
1451 la = keep8(31)
1452 ELSE
1453C MAXS has normally be dimensionned to store only factors.
1454 la = maxs
1455 IF(maxs.GT.keep8(31)+keep8(20)*int(keep(107)+1,8))THEN
1456C If we have a very large MAXS, the size reserved for
1457C loading the factors into memory does not need to exceed the
1458C total size of factors. The (KEEP8(20)*(KEEP(107)+1)) term
1459C is here in order to ensure that even with round-off
1460C problems (linked to the number of solve zones) factors can
1461C all be stored in-core
1462 la=keep8(31)+keep8(20)*int(keep(107)+1,8)
1463 ENDIF
1464 ENDIF
1465C
1466C We need to allocate a workspace of size LWCB8 for the solve phase.
1467C Either it is available at the end of MAXS, or we perform a
1468C dynamic allocation.
1469 IF ( maxs-la .GT. lwcb8_min ) THEN
1470 lwcb8 = maxs - la
1471 work_wcb => id%S(la+1_8:la+lwcb8)
1472 work_wcb_allocated=.false.
1473 ELSE
1474 lwcb8 = lwcb8_min
1475 ALLOCATE(work_wcb(lwcb8), stat = allocok)
1476 IF (allocok < 0 ) THEN
1477 info(1)=-13
1478 CALL mumps_set_ierror(lwcb8,info(2))
1479 ENDIF
1480 work_wcb_allocated=.true.
1481 nb_bytes = nb_bytes + lwcb8*k35_8
1482 nb_bytes_max = max(nb_bytes_max,nb_bytes)
1483 ENDIF
1484 ENDIF ! I_AM_SLAVE
1485C -----------------------------------
1486 99 CONTINUE
1487 CALL mumps_propinfo( icntl(1), info(1),
1488 & id%COMM,id%MYID)
1489 IF (info(1) < 0) GOTO 90
1490C -----------------------------------
1491 IF ( i_am_slave ) THEN
1492 IF (keep(201).GT.0) THEN
1494C -- This includes thread creation
1495C -- for asynchronous strategies
1497 is_init_ooc_done = .true.
1498 ENDIF ! KEEP(201).GT.0
1499 ENDIF
1500C
1501 CALL mumps_propinfo( icntl(1), info(1),
1502 & id%COMM,id%MYID)
1503 IF (info(1) < 0) GOTO 90
1504C
1505 IF (i_am_slave) THEN
1506 IF (keep(485).EQ.1) THEN
1507 IF (.NOT. (associated(id%FDM_F_ENCODING))) THEN
1508 WRITE(*,*) "Internal error 18 in CMUMPS_SOL_DRIVER"
1509 CALL mumps_abort()
1510 ENDIF
1511 IF (.NOT. (associated(id%BLRARRAY_ENCODING))) THEN
1512 WRITE(*,*) "Internal error 19 in CMUMPS_SOL_DRIVER"
1513 CALL mumps_abort()
1514 ENDIF
1515C Access to OOC data in module during solve
1516 CALL mumps_fdm_struc_to_mod('F',id%FDM_F_ENCODING)
1517 CALL cmumps_blr_struc_to_mod(id%BLRARRAY_ENCODING)
1518 is_lr_mod_to_struc_done = .true.
1519 ENDIF
1520 ENDIF
1521 IF (id%MYID.EQ.master) THEN
1522 IF ( prokg ) THEN
1523 WRITE( mpg, 150 )
1524C ICNTL(35) should not been accessed during SOLVE thus
1525C print KEEP(486) value set during factorization
1526 & id%NRHS, nbrhs, icntl(9), icntl(10), icntl(11),
1527 & icntl(20), icntl(21), icntl(30), keep(486)
1528 IF (keep(111).NE.0) THEN
1529 WRITE (mpg, 151) keep(111)
1530 ENDIF
1531 IF (keep(221).NE.0) THEN
1532 WRITE (mpg, 152) keep(221)
1533 ENDIF
1534 IF (keep(252).GT.0) THEN ! Fwd during facto
1535 WRITE (mpg, 153) keep(252)
1536 ENDIF
1537 ENDIF
1538C
1539C ====================================
1540C Define LSCAL, ICNTL10 and ICNTL11
1541C ====================================
1542C
1543 lscal = (((keep(52) .GT. 0) .AND. (keep(52) .LE. 8)) .OR. (
1544 & keep(52) .EQ. -1) .OR. keep(52) .EQ. -2)
1545 icntl10 = icntl(10)
1546 icntl11 = icntl(11)
1547C Values of ICNTL(11) out of range
1548 IF ((icntl11 .LT. 0).OR.(icntl11 .GE. 3)) THEN
1549 icntl11 = 0
1550 IF (prokg) WRITE(mpg,'(A)')
1551 & ' WARNING: ICNTL(11) out of range'
1552 ENDIF
1553 postpros = .false.
1554 IF (icntl11.NE.0 .OR. icntl10.NE.0) THEN
1555 postpros = .true.
1556C FORBID ERROR ANALYSIS AND ITERATIVE REFINEMENT
1557C if there are options that are not compatible
1558 IF (keep(111).NE.0) THEN
1559C IF WE RETURN A NULL SPACE BASIS or compute entries in A-1
1560C of Fwd in facto
1561C -When only one columns of A-1 is requested then
1562C we could try to reactivate IR even if
1563C -code need be updated
1564C -accuracy could be # when one or more columns are requested
1565 IF (prokg) WRITE(mpg,'(A,A)')
1566 & ' WARNING: Incompatible features: null space basis ',
1567 & ' and Iter. Ref and/or Err. Anal.'
1568 postpros = .false.
1569 ELSE IF ( keep(237) .NE.0 ) THEN
1570 IF (prokg) WRITE(mpg,'(A,A)')
1571 & ' WARNING: Incompatible features: AM1',
1572 & ' and Iter. Ref and/or Err. Anal.'
1573 postpros = .false.
1574 ELSE IF ( keep(252) .NE.0 ) THEN
1575 IF (prokg) WRITE(mpg,'(A,A)')
1576 & ' WARNING: Incompatible features: Fwd in facto ',
1577 & ' and Iter. Ref and/or Err. Anal.'
1578 postpros = .false.
1579 ELSE IF (keep(221).NE.0) THEN
1580C Forbid error analysis and iterative refinement
1581C in case of reduced rhs/solution
1582 IF (prokg) WRITE(mpg,'(A,A)')
1583 & ' WARNING: Incompatible features: reduced RHS ',
1584 & ' and Iter. Ref and/or Err. Anal.'
1585 postpros = .false.
1586 ELSE IF (nbrhs.GT. 1 .OR. icntl(21) .GT. 0) THEN
1587C Forbid error analysis and iterative refinement if
1588C the solution is distributed or
1589C in the case where nrhs > 1
1590 IF (prokg) WRITE(mpg,'(A,A)')
1591 & ' WARNING: Incompatible features: nrhs>1 or distrib sol',
1592 & ' and Iter. Ref and/or Err. Anal.'
1593 postpros = .false.
1594 ELSE IF ( keep(248) .EQ. -1 ) THEN
1595C Forbid error analysis and iterative refinement
1596C in case of distributed RHS
1597 IF (prokg) WRITE(mpg,'(A,A)')
1598 & ' WARNING: Incompatible features: distrib rhs',
1599 & ' and Iter. Ref and/or Err. Anal.'
1600 postpros = .false.
1601 ENDIF
1602 IF (.NOT.postpros) THEN
1603 icntl11 = 0
1604 icntl10 = 0
1605 ENDIF
1606 ENDIF
1607C Write a warning.
1608 IF ((icntl(10) .NE. 0) .AND. (icntl10 .EQ. 0)) THEN
1609 IF (prokg) WRITE(mpg,'(A)')
1610 & ' WARNING: ICNTL(10) treated as if set to 0 '
1611 ENDIF
1612 IF ((icntl(11) .NE. 0)
1613 & .AND.(icntl11 .EQ. 0)) THEN
1614 IF (prokg) WRITE(mpg,'(A)')
1615 & ' WARNING: ICNTL(11) treated as if set to 0 '
1616 ENDIF
1617C -- end of test master
1618 END IF
1619 CALL mpi_bcast(postpros,1,mpi_logical,master,
1620 & id%COMM,ierr)
1621C We need the original matrix only in the case of
1622C we want to perform IR or Error Analysis, i.e. if
1623C POSTPros = TRUE
1624 mat_alloc_loc = 0
1625 IF ( postpros ) THEN
1626 mat_alloc_loc = 1
1627C Check if the original matrix has been allocated.
1628 IF ( keep(54) .EQ. 0 ) THEN
1629C The original matrix is centralized
1630 IF ( id%MYID .eq. master ) THEN
1631 IF (keep(55).eq.0) THEN
1632C Case of matrix assembled centralized
1633 IF (.NOT.associated(id%A) .OR.
1634 & (.NOT.associated(id%IRN)) .OR.
1635 & ( .NOT.associated(id%JCN))) THEN
1636 IF (prokg) WRITE(mpg,'(A)')
1637 & ' WARNING: original centralized assembled',
1638 & ' matrix is not allocated '
1639 mat_alloc_loc = 0
1640 ENDIF
1641 ELSE
1642C Case of matrix in elemental format
1643 IF (.NOT.associated(id%A_ELT).OR.
1644 & .NOT.associated(id%ELTPTR).OR.
1645 & .NOT.associated(id%ELTVAR)) THEN
1646 IF (prokg) WRITE(mpg,'(A)')
1647 & ' WARNING: original elemental matrix is not allocated '
1648 mat_alloc_loc = 0
1649 ENDIF
1650 ENDIF
1651 ENDIF !end master, centralized matrix
1652 ELSE
1653C The original matrix is assembled distributed
1654 IF ( i_am_slave .AND. (id%KEEP8(29) .GT. 0_8) ) THEN
1655C If MAT_ALLOC_LOC = 1 the local distributed matrix is
1656C allocated, otherwise MAT_ALLOC_LOC = 0
1657 IF ((.NOT.associated(id%A_loc)) .OR.
1658 & (.NOT.associated(id%IRN_loc)) .OR.
1659 & (.NOT.associated(id%JCN_loc))) THEN
1660 IF (prokg) WRITE(mpg,'(A)')
1661 & ' WARNING: original distributed assembled',
1662 & ' matrix is not allocated '
1663 mat_alloc_loc = 0
1664 ENDIF
1665 ENDIF
1666 ENDIF ! end test allocation matrix (keep(54))
1667 ENDIF ! POSTPros
1668 CALL mpi_reduce( mat_alloc_loc, mat_alloc, 1,
1669 & mpi_integer,
1670 & mpi_min, master, id%COMM, ierr)
1671 IF ( id%MYID .eq. master ) THEN
1672 IF (mat_alloc.EQ.0) THEN
1673 postpros = .false.
1674 icntl11 = 0
1675 icntl10 = 0
1676C Write a warning.
1677 IF ((icntl(10) .NE. 0) .AND. (icntl10 .EQ. 0)) THEN
1678 IF (prokg) WRITE(mpg,'(A)')
1679 & ' WARNING: ICNTL(10) treated as if set to 0 '
1680 ENDIF
1681 IF ((icntl(11) .EQ. 1).OR.(icntl(11) .EQ. 2)
1682 & .AND.(icntl11 .EQ. 0)) THEN
1683 IF (prokg) WRITE(mpg,'(A)')
1684 & ' WARNING: ICNTL(11) treated as if set to 0 '
1685 ENDIF
1686 ENDIF
1687 IF (postpros) THEN
1688 ALLOCATE(saverhs(id%N*nbrhs),stat = allocok)
1689 IF ( allocok .GT. 0 ) THEN
1690 IF (lpok) THEN
1691 WRITE(lp,*) id%MYID,
1692 & ':Problem in solve: error allocating SAVERHS'
1693 ENDIF
1694 info(1) = -13
1695 info(2) = id%N*nbrhs
1696 END IF
1697 nb_bytes = nb_bytes + int(size(saverhs),8)*k35_8
1698 nb_bytes_max = max(nb_bytes_max,nb_bytes)
1699 ENDIF
1700C
1701C Forbid entries in a-1, in case of null space computations
1702c
1703 IF (keep(237).NE.0 .AND.keep(111).NE.0) THEN
1704C Ignore ENTRIES IN A-1 in case we compute
1705C vectors of the null space (KEEP(111)).NE.0.)
1706C We should still allocate IRHS_SPARSE
1707 IF (prokg) WRITE(mpg,'(A)')
1708 & ' WARNING: KEEP(237) treated as if set to 0 (null space)'
1709 keep(237)=0
1710 ENDIF
1711C -- end of test master
1712 END IF
1713 CALL mumps_propinfo( icntl(1), info(1),
1714 & id%COMM,id%MYID)
1715 IF (info(1) .LT.0 ) GOTO 90
1716C --------------------------------------------------
1717C Broadcast information to have all processes do the
1718C same thing (error analysis/iterative refinements/
1719C scaling/distribution of solution)
1720C --------------------------------------------------
1721 CALL mpi_bcast(icntl10,1,mpi_integer,master,
1722 & id%COMM,ierr)
1723 CALL mpi_bcast(icntl11,1,mpi_integer,master,
1724 & id%COMM,ierr)
1725 CALL mpi_bcast(icntl21,1,mpi_integer,master,
1726 & id%COMM,ierr)
1727 CALL mpi_bcast(postpros,1,mpi_logical,master,
1728 & id%COMM,ierr)
1729 CALL mpi_bcast(lscal,1,mpi_logical,master,
1730 & id%COMM,ierr)
1731 CALL mpi_bcast(keep(237),1,mpi_integer,master,
1732 & id%COMM,ierr)
1733C KEEP(248)==1 if not_NullSpace (KEEP(111)=0)
1734C and sparse RHS on input (id%ICNTL(20)/KEEP(248)==1)
1735C (KEEP(248)==1 implies KEEP(111) = 0, otherwise error was raised)
1736C We cant thus isolate the case of
1737C sparse RHS associated to Null space computation because
1738C in this case preparation is different since
1739C -we skip the forward step and
1740C -the pattern of the RHS
1741C of the bwd is related to null pivot indices found and not
1742C to information contained in the sparse rhs input format.
1743 do_permute_rhs = (keep(242).NE.0)
1744C apply interleaving in parallel (FOR A-1 or Null space only)
1745 IF ( (id%NSLAVES.GT.1) .AND. (keep(243).NE.0)
1746 & ) THEN
1747C -- Option to interleave RHS only makes sense when
1748C -- A-1 option is on or Null space compution are on
1749C (note also that KEEP(243).NE.0 only when PERMUTE_RHS is on)
1750 IF ((keep(237).NE.0).or.(keep(111).GT.0)) THEN
1751 interleave_par= .true.
1752 ELSE
1753 IF (prokg) THEN
1754 write(mpg,*) ' Warning incompatible options ',
1755 & ' interleave RHS reset to false '
1756 ENDIF
1757 ENDIF
1758 ENDIF
1759C --------------------------------------
1760C Compute an upperbound of message size
1761C for forward and backward solutions:
1762C --------------------------------------
1763 msg_max_bytes_solve8 = int(( 4 + keep(133) ) * keep(34),8) +
1764 & int(keep(133)*keep(35),8) * int(nbrhs,8)
1765 & + int(16*keep(34),8) ! for request id, pointer to next + safety
1766C Note that
1767 IF ( msg_max_bytes_solve8 .GT.
1768 & int(huge(i4),8)) THEN
1769 info(1) = -18
1770 info(2) = ( huge(i4) -
1771 & ( 16 + 4 + keep(133) ) ) /
1772 & ( keep(133) * keep(35) )
1773 ENDIF
1774 IF (info(1) .LT.0 ) GOTO 111
1775 msg_max_bytes_solve = int(msg_max_bytes_solve8)
1776C ------------------------------------------
1777C Compute an upperbound of message size
1778C for CMUMPS_GATHER_SOLUTION. Except
1779C possibly on the non working host, it
1780C should be smaller than MSG_MAX_BYTES_SOLVE
1781C ------------------------------------------
1782 IF (keep(237).EQ.0) THEN
1783C Note that for CMUMPS_GATHER_SOLUTION LBUFR buffer should
1784C be larger that MAX_inode(NPIV))*NBRHS + NPIV
1785C which is covered by next formula since KMAX_246_247 is larger
1786C than MAX_inode(NPIV))
1787C 2 integers packed (npiv and termination)
1788C Note that MSG_MAX_BYTES_GTHRSOL < MSG_MAX_BYTES_SOLVE
1789C so that it should not overflow
1790 kmax_246_247 = max(keep(246),keep(247))
1791 msg_max_bytes_gthrsol = ( 2 + kmax_246_247 ) * keep(34) +
1792 & kmax_246_247 * nbrhs * keep(35)
1793 ELSE IF (icntl21.EQ.0) THEN
1794C Each message from a slave is of size max 4:
1795C 2 integers : I,J
1796C 1 complex : (Aij)-1
1797C 1 terminaison
1798 msg_max_bytes_gthrsol = ( 3 * keep(34) + keep(35) )
1799 ELSE
1800C Not needed in case of distributed solution and A-1
1801C because the entries of A −1 are
1802C returned in RHS SPARSE on the host.
1803 msg_max_bytes_gthrsol = 0
1804 ENDIF
1805C The buffer is used both for solve and for CMUMPS_GATHER_SOLUTION
1806 lbufr_bytes = max(msg_max_bytes_solve, msg_max_bytes_gthrsol)
1807 tsize = int(min(100_8*int(msg_max_bytes_gthrsol,8),
1808 & 10000000_8))
1809 lbufr_bytes = max(lbufr_bytes,tsize)
1810 lbufr = ( lbufr_bytes + keep(34) - 1 ) / keep(34)
1811 ALLOCATE (bufr(lbufr),stat=allocok)
1812 IF ( allocok .GT. 0 ) THEN
1813 IF (lpok) THEN
1814 WRITE(lp,*) id%MYID,
1815 & ' Problem in solve: error allocating BUFR'
1816 ENDIF
1817 info(1) = -13
1818 info(2) = lbufr
1819 GOTO 111
1820 ENDIF
1821 nb_bytes = nb_bytes + int(size(bufr),8)*k34_8
1822 nb_bytes_max = max(nb_bytes_max,nb_bytes)
1823 IF ( i_am_slave .AND. id%NSLAVES .GT. 1 ) THEN
1824C ------------------------------------------------------
1825C Dimension send buffer for small integers, e.g. TRACINE
1826C ------------------------------------------------------
1827 cmumps_lbuf_int = ( 20 + id%NSLAVES * id%NSLAVES * 4 )
1828 & * keep(34)
1829 CALL cmumps_buf_alloc_small_buf( cmumps_lbuf_int, ierr )
1830 IF ( ierr .NE. 0 ) THEN
1831 info(1) = -13
1832 info(2) = cmumps_lbuf_int
1833 IF ( lpok) THEN
1834 WRITE(lp,*) id%MYID,
1835 & ':Error allocating small Send buffer:IERR=',ierr
1836 END IF
1837 GOTO 111
1838 END IF
1839C
1840C ---------------------------------------
1841C Dimension cyclic send buffer for normal
1842C messages, based on largest message
1843C size during forward and backward solves
1844C ---------------------------------------
1845C Compute buffer size in BYTES (CMUMPS_LBUF)
1846C using integer8 in CMUMPS_LBUF_8
1847C then convert it in integer4 and bound it to largest integer value
1848C
1849 cmumps_lbuf_8 =
1850 & (int(msg_max_bytes_solve,8)+2_8*int(keep(34),8))*
1851 & int(id%NSLAVES,8)
1852C Avoid buffers larger than 100 Mbytes ...
1853 cmumps_lbuf_8 = min(cmumps_lbuf_8, 100000000_8)
1854C ... as long as we can send messages to at least 3
1855C destinations simultaneously
1856 cmumps_lbuf_8 = max(cmumps_lbuf_8,
1857 & int((msg_max_bytes_solve+2*keep(34)),8) *
1858 & int(min(id%NSLAVES,3),8) )
1859 cmumps_lbuf_8 = cmumps_lbuf_8 + 2_8*int(keep(34),8)
1860C Convert to integer and bound it to largest 32-bit integer
1861C and suppress 10 integers (one should be enough!)
1862C to enable computation of integer size.
1863 cmumps_lbuf_8 = min(cmumps_lbuf_8,
1864 & int(huge(i4),8)
1865 & - 10_8*int(keep(34),8)
1866 & )
1867 cmumps_lbuf = int(cmumps_lbuf_8, kind(cmumps_lbuf))
1868 CALL cmumps_buf_alloc_cb( cmumps_lbuf, ierr )
1869 IF ( ierr .NE. 0 ) THEN
1870 info(1) = -13
1871 info(2) = cmumps_lbuf/keep(34) + 1
1872 IF ( lpok) THEN
1873 WRITE(lp,*) id%MYID,
1874 & ':Error allocating Send buffer:IERR=', ierr
1875 END IF
1876 GOTO 111
1877 END IF
1878C
1879C
1880C -- end of I am slave
1881 ENDIF
1882C
1883 IF ( postpros ) THEN
1884C When Iterative refinement of error analysis requested
1885C Allocate RHS_IR on slave processors
1886C (note that on MASTER RHS_IR points to RHS)
1887 IF ( id%MYID .NE. master ) THEN
1888C
1889 ALLOCATE(rhs_ir(id%N),stat=ierr)
1890 nb_bytes = nb_bytes + int(size(rhs_ir),8)*k35_8
1891 nb_bytes_max = max(nb_bytes_max,nb_bytes)
1892 IF ( ierr .GT. 0 ) THEN
1893 info(1)=-13
1894 info(2)=id%N
1895 IF (lpok) THEN
1896 WRITE(lp,*) 'ERROR while allocating RHS on a slave'
1897 ENDIF
1898 GOTO 111
1899 END IF
1900 ELSE
1901 rhs_ir=>id%RHS
1902 ENDIF
1903 ENDIF
1904C
1905C Parallel A-1 or General sparse and
1906C exploit sparsity between columns
1907 do_nbsparse = ( ( (keep(237).NE.0).OR.(keep(235).NE.0) )
1908 & .AND.
1909 & ( keep(497).NE.0 )
1910 & )
1911 IF ( i_am_slave ) THEN
1912 IF(do_nbsparse) THEN
1913c --- ALLOCATE outside loop RHS_BOUNDS is needed
1914 lptr_rhs_bounds = 2*keep(28)
1915 ALLOCATE(rhs_bounds(lptr_rhs_bounds), stat=ierr)
1916 IF (ierr.GT.0) THEN
1917 info(1)=-13
1918 info(2)=lptr_rhs_bounds
1919 IF (lpok) THEN
1920 WRITE(lp,*) 'ERROR while allocating RHS_BOUNDS on',
1921 & ' a slave'
1922 ENDIF
1923 GOTO 111
1924 END IF
1925 nb_bytes = nb_bytes +
1926 & int(size(rhs_bounds),8)*k34_8
1927 nb_bytes_max = max(nb_bytes_max,nb_bytes)
1928 ptr_rhs_bounds => rhs_bounds
1929 ELSE
1930 lptr_rhs_bounds = 1
1931 ptr_rhs_bounds => idummy_target
1932 ENDIF
1933 ENDIF
1934C --------------------------------------------------
1935 IF ( i_am_slave ) THEN
1936 IF ((keep(221).EQ.2 .AND. keep(252).EQ.0)) THEN
1937C -- RHSCOMP must have been allocated in
1938C -- previous solve step (with option KEEP(221)=1)
1939 IF (.NOT.associated(id%RHSCOMP)) THEN
1940 info(1) = -35
1941 info(2) = 1
1942 GOTO 111
1943 ENDIF
1944C IF ((KEEP(248).EQ.0) .OR. (id%NRHS.EQ.1)) THEN
1945C POSINRHSCOMP_ROW/COL are meaningful and could even be reused
1946 IF (.NOT.associated(id%POSINRHSCOMP_ROW) ) ! .OR.
1947! & .NOT.(id%POSINRHSCOMP_COL_ALLOC))
1948 & THEN
1949 info(1) = -35
1950 info(2) = 2
1951 GOTO 111
1952 ENDIF
1953 IF (.not.id%POSINRHSCOMP_COL_ALLOC) THEN
1954C POSINRHSCOMP_COL that is kept from
1955C previous call to solve must then (already)
1956C point to id%POSINRHSCOMP_ROW
1957 id%POSINRHSCOMP_COL => id%POSINRHSCOMP_ROW
1958 ENDIF
1959 ELSE
1960C ----------------------
1961C Allocate POSINRHSCOMP_ROW/COL
1962C ----------------------
1963C The size of POSINRHSCOMP arrays
1964C does not depend on the block of RHS
1965C POSINRHSCOMP_ROW/COL are initialized in the loop of RHS
1966 IF (associated(id%POSINRHSCOMP_ROW)) THEN
1967 nb_bytes = nb_bytes -
1968 & int(size(id%POSINRHSCOMP_ROW),8)*k34_8
1969 DEALLOCATE(id%POSINRHSCOMP_ROW)
1970 ENDIF
1971 ALLOCATE (id%POSINRHSCOMP_ROW(id%N), stat = allocok)
1972 IF ( allocok .GT. 0 ) THEN
1973 info(1)=-13
1974 info(2)=id%N
1975 GOTO 111
1976 END IF
1977 nb_bytes = nb_bytes +
1978 & int(size(id%POSINRHSCOMP_ROW),8)*k34_8
1979 nb_bytes_max = max(nb_bytes_max,nb_bytes)
1980 IF (id%POSINRHSCOMP_COL_ALLOC) THEN
1981 nb_bytes = nb_bytes -
1982 & int(size(id%POSINRHSCOMP_COL),8)*k34_8
1983 DEALLOCATE(id%POSINRHSCOMP_COL)
1984 NULLIFY(id%POSINRHSCOMP_COL)
1985 id%POSINRHSCOMP_COL_ALLOC = .false.
1986 ENDIF
1987C
1988 IF ((keep(50).EQ.0).OR.keep(237).NE.0) THEN
1989 ALLOCATE (id%POSINRHSCOMP_COL(id%N), stat = allocok)
1990 IF ( allocok .GT. 0 ) THEN
1991 info(1)=-13
1992 info(2)=id%N
1993 GOTO 111
1994 END IF
1995 id%POSINRHSCOMP_COL_ALLOC = .true.
1996 nb_bytes = nb_bytes +
1997 & int(size(id%POSINRHSCOMP_COL),8)*k34_8
1998 nb_bytes_max = max(nb_bytes_max,nb_bytes)
1999 ELSE
2000C Do no allocate POSINRHSCOMP_COL
2001 id%POSINRHSCOMP_COL => id%POSINRHSCOMP_ROW
2002 id%POSINRHSCOMP_COL_ALLOC = .false.
2003 ENDIF
2004 IF (keep(221).NE.2) THEN
2005C -- only in the case of bwd after reduced RHS
2006C -- we have to keep "old" RHSCOMP
2007 IF (associated(id%RHSCOMP)) THEN
2008 nb_bytes = nb_bytes - id%KEEP8(25)*k35_8
2009 DEALLOCATE(id%RHSCOMP)
2010 NULLIFY(id%RHSCOMP)
2011 id%KEEP8(25)=0_8
2012 ENDIF
2013 ENDIF
2014 ENDIF
2015C ---------------------------
2016C Allocate local workspace
2017C for the solve (CMUMPS_SOL_C)
2018C ---------------------------
2019 liwk_solve = 2 * keep(28) + id%NA(1)+1
2020 liwk_ptracb= keep(28)
2021C KEEP(228)+1 temporary integer positions
2022C will be needed in CMUMPS_SOL_S
2023 IF (keep(201).EQ.1) THEN
2024 liwk_solve = liwk_solve + keep(228) + 1
2025 ELSE
2026C Reserve 1 position to pass array of size 1 in routines
2027 liwk_solve = liwk_solve + 1
2028 ENDIF
2029 ALLOCATE ( iwk_solve(liwk_solve),
2030 & ptracb(liwk_ptracb), stat = allocok )
2031 IF (allocok .GT. 0 ) THEN
2032 info(1)=-13
2033 info(2)=liwk_solve + liwk_ptracb*keep(10)
2034 GOTO 111
2035 END IF
2036 nb_bytes = nb_bytes + int(liwk_solve,8)*k34_8 +
2037 & int(liwk_ptracb,8)*k34_8 *int(keep(10),8)
2038 nb_bytes_max = max(nb_bytes_max,nb_bytes)
2039C array IWCB used temporarily to hold
2040C indices of a front unpacked from a message
2041C and to stack (potentially in a recursive call)
2042C headers of size 2 positions of CB blocks.
2043 liwcb = 20*nb_k133*2 + keep(133)
2044 ALLOCATE ( iwcb( liwcb), stat = allocok )
2045 IF (allocok .GT. 0 ) THEN
2046 info(1)=-13
2047 info(2)=liwcb
2048 GOTO 111
2049 END IF
2050 nb_bytes = nb_bytes + int(liwcb,8)*k34_8
2051 nb_bytes_max = max(nb_bytes_max,nb_bytes)
2052C
2053C -- Code for a slave
2054C -----------
2055C Subdivision
2056C of array IS
2057C -----------
2058 liw = keep(32)
2059C Define a work array of size maximum global frontal
2060C size (KEEP(133)) for the call to CMUMPS_SOL_C
2061C This used to be of size id%N.
2062 ALLOCATE(srw3(keep(133)), stat = allocok )
2063 IF ( allocok .GT. 0 ) THEN
2064 info(1)=-13
2065 info(2)=keep(133)
2066 GOTO 111
2067 END IF
2068 nb_bytes = nb_bytes + int(size(srw3),8)*k35_8
2069 nb_bytes_max = max(nb_bytes_max,nb_bytes)
2070C -----------------
2071C End of slave code
2072C -----------------
2073 ELSE
2074C I am the master with host not working
2075C
2076C LIW is used on master when calling
2077C the routine CMUMPS_GATHER_SOLUTION.
2078 liw=0
2079 END IF
2080C
2081C Precompute inverse of UNS_PERM outside loop
2082 IF (allocated(uns_perm_inv)) DEALLOCATE(uns_perm_inv)
2083 uns_perm_inv_needed_inmainloop = .false.
2084 IF ( ( id%MYID .eq. master.AND.(keep(23).GT.0) .AND.
2085 & (mtype .NE. 1).AND.(keep(248).NE.0)
2086 & )
2087C Permute UNS_PERM on master only with
2088C sparse RHS (KEEP(248).NE.0 ) when AT x = b is solved
2089 & .OR. ( keep(237).NE.0 .AND. keep(23).NE.0 )
2090C When A-1 is active and when the matrix is unsymmetric
2091C and a column permutation has been applied (Max transversal)
2092C then we have performed a
2093C factorization of a column permuted matrix AQ = LU.
2094C In this case,
2095C the permuted entry must be used to select the target
2096C entries for the BWD (note that a diagonal entry of A-1
2097C is not anymore a diagonal of AQ. Thus a diagonal
2098C of A-1 does not correspond to the same path
2099C in the tree during FWD and BWD steps when MAXTRANS is on
2100C and permutation is not identity.)
2101C Note that the inverse permutation
2102C UNS_PERM_INV needs to be allocated on each proc
2103C since it is used in CMUMPS_SOL_C routine for pruning.
2104C It is allocated only once and its allocation has been
2105C migrated outside the blocking on the right hand sides.
2106 & ) THEN
2107 uns_perm_inv_needed_inmainloop = .true.
2108 ENDIF
2109 uns_perm_inv_needed_befmainloop = .false.
2110 IF ( keep(23) .GT.0 .AND.
2111 & mtype .NE. 1 .AND. keep(248).EQ.-1 ) THEN
2112C Similar to sparse RHS case, we need to modify IRHS_loc
2113C indices in the distributed RHS case. However, we need
2114C UNS_PERM_INV on all processors. But only before theC
2115C main loop on the RHS blocks.
2116 uns_perm_inv_needed_befmainloop = .true.
2117 ENDIF
2118 IF ( uns_perm_inv_needed_inmainloop .OR.
2119 & uns_perm_inv_needed_befmainloop ) THEN
2120 ALLOCATE(uns_perm_inv(id%N),stat=allocok)
2121 if (allocok .GT.0 ) THEN
2122 info(1)=-13
2123 info(2)=id%N
2124 GOTO 111
2125 endif
2126 nb_bytes = nb_bytes + int(id%N,8)*k34_8
2127 nb_bytes_max = max(nb_bytes_max,nb_bytes)
2128 IF (id%MYID.EQ.master) THEN
2129C Build inverse permutation
2130 DO i = 1, id%N
2131 uns_perm_inv(id%UNS_PERM(i))=i
2132 ENDDO
2133 ENDIF
2134C
2135 ELSE
2136 ALLOCATE(uns_perm_inv(1), stat=allocok)
2137 if (allocok .GT.0 ) THEN
2138 info(1)=-13
2139 info(2)=1
2140 GOTO 111
2141 endif
2142 nb_bytes = nb_bytes + 1_8*k34_8
2143 nb_bytes_max = max(nb_bytes_max,nb_bytes)
2144 ENDIF
2145C
2146 111 CONTINUE
2147#if defined(V_T)
2148 CALL vtend(glob_comm_ini,ierr)
2149#endif
2150C
2151C Synchro point + Broadcast of errors
2152C
2153C Propagate errors
2154 CALL mumps_propinfo( icntl(1), info(1),
2155 & id%COMM,id%MYID)
2156 IF (info(1) .LT.0 ) GOTO 90
2157C
2158C UNS_PERM_INV needed on slaves:
2159 IF ( keep(23).NE.0 .AND.
2160 & ( keep(237).NE.0 .OR.
2161 & ( mtype.NE.1 .AND. keep(248).EQ.-1 ) ) ) THEN
2162C Broadcast UNS_PERM_INV
2163 CALL mpi_bcast( uns_perm_inv,id%N,mpi_integer,master,
2164 & id%COMM,ierr )
2165 ENDIF
2166C -------------------------------
2167C BEGIN
2168C Preparation for distributed RHS
2169C -------------------------------
2170 IF (i_am_slave .AND. keep(248).EQ.-1) THEN
2171C Distributed RHS case
2172 ALLOCATE(map_rhs_loc(max(id%Nloc_RHS,1)), stat=allocok)
2173 IF (allocok .GT. 0) THEN
2174 id%INFO(1)=-13
2175 id%INFO(2)=max(id%Nloc_RHS,1)
2176 GOTO 20
2177 ENDIF
2178 nb_bytes = nb_bytes + max(int(id%Nloc_RHS,8),1_8)*k34_8
2179 ENDIF
2180C MAP_RHS_loc will be built in the main
2181C loop, when processing the first block.
2182C It requires POSINRHSCOMP to be built.
2183 build_rhsmapinfo = .true.
2184 20 CONTINUE
2185 CALL mumps_propinfo( icntl(1), info(1),
2186 & id%COMM,id%MYID)
2187 IF ( info(1) .LT.0 ) GOTO 90
2188C In case of Unsymmetric column permutation and
2189C transpose system, use MUMPS internal indices
2190C for IRHS_loc_PTR. Done before scaling since
2191C scaling is on permuted matrix
2192 IF ( i_am_slave .AND. keep(23).GT.0 .AND. keep(248).EQ.-1
2193 & .AND. mtype.NE.1 ) THEN
2194 IF (id%Nloc_RHS .GT. 0) THEN
2195 ALLOCATE(irhs_loc_ptr(id%Nloc_RHS),stat=allocok)
2196 IF (allocok.GT.0) THEN
2197 info(1)=-13
2198 info(2)=id%Nloc_RHS
2199 GOTO 25
2200 ENDIF
2201 irhs_loc_ptr_allocated = .true.
2202 nb_bytes = nb_bytes + max(int(id%Nloc_RHS,8),1_8)*k34_8
2203 nb_bytes_max = max(nb_bytes_max,nb_bytes)
2204 DO i=1, id%Nloc_RHS
2205 IF (id%IRHS_loc(i).GE.1 .AND. id%IRHS_loc(i).LE.id%N)
2206 & THEN
2207 irhs_loc_ptr(i)=uns_perm_inv(id%IRHS_loc(i))
2208 ELSE
2209C Keep track of out-of range entries
2210 irhs_loc_ptr(i)=id%IRHS_loc(i)
2211 ENDIF
2212 ENDDO
2213 ENDIF
2214 ENDIF
2215C Check if UNS_PERM_INV still needed
2216C to free memory
2217 IF (uns_perm_inv_needed_befmainloop .AND.
2218 & .NOT. uns_perm_inv_needed_inmainloop) THEN
2219 nb_bytes = nb_bytes - int(size(uns_perm_inv),8)*k34_8
2220 DEALLOCATE(uns_perm_inv)
2221 ALLOCATE(uns_perm_inv(1)) ! to posibly pass it as an argument
2222 nb_bytes = nb_bytes + k34_8
2223 ENDIF
2224 IF (lscal .AND. id%KEEP(248).EQ.-1) THEN
2225C Scaling done based on original indices
2226C provided by user
2227 IF (mtype == 1) THEN
2228C No transpose
2229 scaling_data_dr%SCALING=>id%ROWSCA
2230 ELSE
2231C Transpose
2232 scaling_data_dr%SCALING=>id%COLSCA
2233 ENDIF
2234 CALL cmumps_set_scaling_loc( scaling_data_dr, id%N,
2235 & irhs_loc_ptr(1), id%Nloc_RHS,
2236 & id%COMM, id%MYID, i_am_slave, master,
2237 & nb_bytes, nb_bytes_max, k16_8, lp, lpok,
2238 & icntl(1), info(1) )
2239 ENDIF
2240C -------------------------------
2241C END
2242C Preparation for distributed RHS
2243C -------------------------------
2244 25 CONTINUE
2245 CALL mumps_propinfo( icntl(1), info(1),
2246 & id%COMM,id%MYID)
2247 IF ( info(1) .LT.0 ) GOTO 90
2248C -------------------------------------
2249C BEGIN
2250C Preparation for distributed solution
2251C -------------------------------------
2252 IF ( icntl21==1 ) THEN
2253 IF (lscal) THEN
2254C In case of scaling we will need to scale
2255C back the sol. Put the values of the scaling
2256C arrays needed to do that on each processor.
2257 IF (id%MYID.NE.master) THEN
2258 IF (mtype == 1) THEN
2259 ALLOCATE(id%COLSCA(id%N),stat=allocok)
2260 ELSE
2261 ALLOCATE(id%ROWSCA(id%N),stat=allocok)
2262 ENDIF
2263 IF (allocok > 0) THEN
2264 IF (lpok) THEN
2265 WRITE(lp,*) 'Error allocating temporary scaling array'
2266 ENDIF
2267 info(1)=-13
2268 info(2)=id%N
2269 GOTO 37
2270 ENDIF
2271 nb_bytes = nb_bytes + int(id%N,8)*k16_8
2272 nb_bytes_max = max(nb_bytes_max,nb_bytes)
2273 ENDIF ! MYID .NE. MASTER
2274 37 CALL mumps_propinfo( icntl(1), info(1),
2275 & id%COMM,id%MYID)
2276 IF (info(1) .LT.0 ) GOTO 90
2277 IF (i_am_slave) THEN
2278 ALLOCATE(scaling_data_sol%SCALING_LOC(id%KEEP(89)),
2279 & stat=allocok)
2280 IF (allocok > 0) THEN
2281 IF (lpok) THEN
2282 WRITE(lp,*) 'Error allocating local scaling array'
2283 ENDIF
2284 info(1)=-13
2285 info(2)=id%KEEP(89)
2286 GOTO 38
2287 ENDIF
2288 nb_bytes = nb_bytes + int(id%KEEP(89),8)*k16_8
2289 nb_bytes_max = max(nb_bytes_max,nb_bytes)
2290 ENDIF ! I_AM_SLAVE
2291 38 CONTINUE
2292 CALL mumps_propinfo( icntl(1), info(1),
2293 & id%COMM,id%MYID)
2294 IF (info(1) .LT.0 ) THEN
2295 GOTO 90
2296 ENDIF
2297 IF (mtype == 1) THEN
2298 CALL mpi_bcast(id%COLSCA(1),id%N,
2299 & mpi_real,master,
2300 & id%COMM,ierr)
2301 scaling_data_sol%SCALING=>id%COLSCA
2302 ELSE
2303 CALL mpi_bcast(id%ROWSCA(1),id%N,
2304 & mpi_real,master,
2305 & id%COMM,ierr)
2306 scaling_data_sol%SCALING=>id%ROWSCA
2307 ENDIF
2308 ENDIF ! LSCAL
2309 IF ( i_am_slave ) THEN
2310C ----------------------
2311C Prepare ISOL_loc array
2312C ----------------------
2313 liw_passed=max(1,liw)
2314C Only called if more than 1 pivot
2315C was eliminated by the processor.
2316C Note that LSOL_loc >= KEEP(89)
2317 IF (keep(89) .GT. 0) THEN
2318 CALL cmumps_distsol_indices( mtype, id%ISOL_loc(1),
2319 & id%PTLUST_S(1),
2320 & id%KEEP(1),id%KEEP8(1),
2321 & id%IS(1), liw_passed,id%MYID_NODES,
2322 & id%N, id%STEP(1), id%PROCNODE_STEPS(1),
2323 & id%NSLAVES, scaling_data_sol, lscal
2324C For checking only
2325 & , (keep(248).EQ.-1), irhs_loc_ptr(1), id%Nloc_RHS
2326 & )
2327 ENDIF
2328 IF (id%MYID.NE.master .AND. lscal) THEN
2329C ---------------------------------
2330C Local (small) scaling arrays have
2331C been built, free temporary copies
2332C ---------------------------------
2333 IF (mtype == 1) THEN
2334 DEALLOCATE(id%COLSCA)
2335 NULLIFY(id%COLSCA)
2336 ELSE
2337 DEALLOCATE(id%ROWSCA)
2338 NULLIFY(id%ROWSCA)
2339 ENDIF
2340 nb_bytes = nb_bytes - int(id%N,8)*k16_8
2341 ENDIF
2342 ENDIF ! I_AM_SLAVE
2343 IF (keep(23) .NE. 0 .AND. mtype==1) THEN
2344C Broadcast the unsymmetric permutation and
2345C permute the indices in ISOL_loc
2346 IF (id%MYID.NE.master) THEN
2347 ALLOCATE(id%UNS_PERM(id%N),stat=allocok)
2348 IF (allocok > 0) THEN
2349 info(1)=-13
2350 info(2)=id%N
2351 GOTO 40
2352 ENDIF
2353 ENDIF
2354 ENDIF
2355C
2356C ===================== ERROR handling and propagation ================
2357 40 CONTINUE
2358 CALL mumps_propinfo( icntl(1), info(1),
2359 & id%COMM,id%MYID)
2360 IF (info(1) .LT.0 ) GOTO 90
2361C ======================================================================
2362C
2363 IF (keep(23) .NE. 0 .AND. mtype==1) THEN
2364 CALL mpi_bcast(id%UNS_PERM(1),id%N,mpi_integer,master,
2365 & id%COMM,ierr)
2366 IF (i_am_slave) THEN
2367 DO i=1, keep(89)
2368 id%ISOL_loc(i) = id%UNS_PERM(id%ISOL_loc(i))
2369 ENDDO
2370 ENDIF
2371 IF (id%MYID.NE.master) THEN
2372 DEALLOCATE(id%UNS_PERM)
2373 NULLIFY(id%UNS_PERM)
2374 ENDIF
2375 ENDIF
2376 ENDIF ! ICNTL(21)=1
2377C --------------------------------------
2378C Preparation for distributed solution
2379C END
2380C --------------------------------------
2381C ----------------------------
2382C Preparation for reduced RHS
2383C ----------------------------
2384 IF ( ( keep(221) .EQ. 1 ) .OR.
2385 & ( keep(221) .EQ. 2 )
2386 & ) THEN
2387C -- First compute MASTER_ROOT_IN_COMM proc number in
2388C COMM_NODES on which is mapped the master of the root.
2389 IF (keep(46).EQ.1) THEN
2390 master_root_in_comm=master_root
2391 ELSE
2392 master_root_in_comm =master_root+1
2393 ENDIF
2394 IF ( id%MYID .EQ. master ) THEN
2395C --------------------------------
2396C Avoid using LREDRHS when id%NRHS is
2397C equal to 1, as was done for RHS
2398C --------------------------------
2399 IF (id%NRHS.EQ.1) THEN
2400 ld_redrhs = id%KEEP(116)
2401 ELSE
2402 ld_redrhs = id%LREDRHS
2403 ENDIF
2404 ENDIF
2405 IF (master.NE.master_root_in_comm) THEN
2406C -- Make available LD_REDRHS on MASTER_ROOT_IN_COMM
2407C This will then be used to test if a single
2408C message can be sent
2409C (this is possible if LD_REDRHS=SIZE_SCHUR)
2410 IF ( id%MYID .EQ. master ) THEN
2411C -- send LD_REDRHS to MASTER_ROOT_IN_COMM
2412C using COMM communicator
2413 CALL mpi_send(ld_redrhs,1,mpi_integer,
2414 & master_root_in_comm, 0, id%COMM,ierr)
2415 ELSEIF ( id%MYID.EQ.master_root_in_comm) THEN
2416C -- recv LD_REDRHS
2417 CALL mpi_recv(ld_redrhs,1,mpi_integer,
2418 & master, 0, id%COMM,status,ierr)
2419 ENDIF
2420C -- other procs not concerned
2421 ENDIF
2422 ENDIF
2423C
2424 IF ( keep(248)==1 ) THEN ! Sparse RHS (A-1 or general sparse)
2425! JBEG_RHS - current starting column within A-1 or sparse rhs
2426! set in the loop below and used to obtain the
2427! global index of the column of the sparse RHS
2428! Also used to get index in global permutation.
2429! It also allows to skip empty columns;
2430 jend_rhs = 0 ! last column in current blockin A-1
2431C
2432C Compute and apply permutations
2433 IF (do_permute_rhs) THEN
2434C Allocate PERM_RHS
2435 ALLOCATE(perm_rhs(id%NRHS),stat=allocok)
2436 IF (allocok > 0) THEN
2437 info(1) = -13
2438 info(2) = id%NRHS
2439 GOTO 109
2440 ENDIF
2441 nb_bytes = nb_bytes + int(id%NRHS,8)*k34_8
2442 nb_bytes_max = max(nb_bytes_max,nb_bytes)
2443 IF (id%MYID.EQ.master) THEN
2444C PERM_RHS is computed on MASTER, it might be modified
2445C in case of interleaving and will thus be distributed
2446C (BCAST) to all slaves only later.
2447C Compute PERM_RHS
2448C on output: PERM_RHS(k) = i means that i is the kth column
2449C to be processed
2450 IF (keep(237).EQ.0) THEN
2451C Permute RHS : case of GS (General Sparse) RHS
2452C IRHS_SPARSE is of size at least NZ_RHS > 0
2453C since all this is skipped when NZ_RHS=0. So
2454C accessing IRHS_SPARSE(1) is ok.
2456 & lp, lpok, prokg, mpg, keep(242),
2457 & id%SYM_PERM(1), id%N, id%NRHS,
2458 & id%IRHS_PTR(1), id%NRHS+1,
2459 & id%IRHS_SPARSE(1), id%NZ_RHS,
2460 & perm_rhs, ierr)
2461 IF (ierr.LT.0) THEN
2462 info(1) = -9999
2463 info(2) = ierr
2464 GOTO 109 ! propagate error
2465 ENDIF
2466 ELSE
2467C Case of A-1 :
2468C We compute the permutation of the RHS (sparse matrix)
2469C (to compute all inverse entries)
2470C We apply permutation to IRHS_SPARSE ONLY.
2471C Note NRHS_NONEMPTY holds the nb of non empty columns
2472C in A-1.
2473 strat_permam1 = keep(242)
2475 & (strat_permam1, id%SYM_PERM(1),
2476 & id%IRHS_PTR(1), id%NRHS+1,
2477 & perm_rhs, id%NRHS,
2478 & ierr
2479 & )
2480 ENDIF
2481 ENDIF
2482 ENDIF
2483 ENDIF
2484C
2485C Note that within CMUMPS_SOL_C, PERM_RHS could be used
2486C for A-1 case (with DO_PERMUTE_RHS OR INTERLEAVE_RHS
2487C being tested) to get the column index for the
2488C original matrix of RHS (column index in A-1)
2489C of the permuted columns that have been selected.
2490C PERM_RHS is also used in CMUMPS_GATHER_SOLUTION
2491C in case of sparse RHS awith DO_PERMUTE_RHS.
2492C
2493C Allocate PERM_RHS of size 1 if not allocated
2494 IF (.NOT. allocated(perm_rhs)) THEN
2495 ALLOCATE(perm_rhs(1),stat=allocok)
2496 IF (allocok > 0) THEN
2497 info(1) = -13
2498 info(2) = 1
2499 GOTO 109
2500 ENDIF
2501 nb_bytes = nb_bytes + int(size(perm_rhs),8)*k34_8
2502 nb_bytes_max = max(nb_bytes_max,nb_bytes)
2503 ENDIF
2504C Propagate errors
2505109 CALL mumps_propinfo( icntl(1), info(1),
2506 & id%COMM,id%MYID)
2507 IF (info(1) .LT.0 ) GOTO 90
2508c --------------------------
2509c --------------------------
2510 IF (id%NSLAVES .EQ. 1) THEN
2511c - In case of NS/A-1 we may want to permute RHS
2512c - for NS thus is to apply permutation to PIVNUL_LIST
2513* - before starting loop of NBRHS
2514 IF (do_permute_rhs .AND. keep(111).NE.0 ) THEN
2515C NOTE:
2516C when host not working both master and slaves have
2517C in this case the complete list
2518 WRITE(*,*) id%MYID, ':INTERNAL ERROR 1 : ',
2519 & ' PERMUTE RHS during null space computation ',
2520 & ' not available yet '
2521 CALL mumps_abort()
2522 ENDIF ! End Permute_RHS
2523 ELSE
2524 IF (do_permute_rhs .AND. keep(111).NE.0 ) THEN
2525 WRITE(*,*) id%MYID, ':INTERNAL ERROR 2 : ',
2526 & ' PERMUTE RHS during null space computation ',
2527 & ' not available yet '
2528 CALL mumps_abort()
2529C
2530C
2531 ENDIF ! End DO_PERMUTE_RHS
2532 IF (interleave_par.AND. (keep(111).NE.0)) THEN
2533 WRITE(*,*) id%MYID, ':INTERNAL ERROR 3 : ',
2534 & ' INTERLEAVE RHS during null space computation ',
2535 & ' not available yet '
2536 CALL mumps_abort()
2537 ENDIF
2538 IF (interleave_par.AND.keep(111).EQ.0) THEN
2539C - A-1 + Interleave:
2540C permute RHS on master
2541 IF (id%MYID.EQ.master) THEN
2542C -- PERM_RHS must have been already set or initialized
2543C -- it is then modified in next routine
2544 size_working = id%IPTR_WORKING(id%NPROCS+1)-1
2545 size_iptr_working = id%NPROCS+1
2547 & perm_rhs, id%NRHS,
2548 & id%IPTR_WORKING(1), size_iptr_working,
2549 & id%WORKING(1), size_working,
2550 & id%IRHS_PTR(1),
2551 & id%STEP(1), id%SYM_PERM(1), id%N, nbrhs,
2552 & id%PROCNODE_STEPS(1), keep(28), id%NSLAVES,
2553 & keep(199),
2554 & keep(493).NE.0,
2555 & keep(495).NE.0, keep(496), prokg, mpg
2556 & )
2557 ENDIF ! End Master
2558 ENDIF ! End A-1 and interleave_par
2559C -------------
2560 ENDIF ! End Parallel Case
2561c --------------------------
2562c
2563 IF (do_permute_rhs.AND.(keep(111).EQ.0)) THEN
2564C --- Distribute PERM_RHS before loop of RHS
2565C --- (with null space option PERM_RHS is not allocated / needed
2566C to permute the null column pivot list)
2567 CALL mpi_bcast(perm_rhs(1),
2568 & id%NRHS,
2569 & mpi_integer,
2570 & master, id%COMM,ierr)
2571 ENDIF
2572C L0-threads to be activated iff KEEP(401)>0 and KEEP(400)>0
2573 IF (keep(401) .GT. 0) THEN
2574C L0-threads was requested for solve phase
2575C and will be effective only if KEEP(400) >0
2576C which indicates that L0-threads was
2577C performed during analysis+factorization
2578 IF ( keep(400) .GT. 0 ) THEN
2579C Check if number of threads is consistent with
2580C the one used during factorization
2581C{
2582 nomp = 1
2583!$ NOMP=omp_get_max_threads()
2584 IF (keep(400).NE.nomp) THEN
2585C NOMP should be the one from analysis
2586 id%INFO(1) = -58
2587 id%INFO(2) = keep(400)
2588 IF (lpok) WRITE(lp,'(A,A,I5,A,I5)')
2589 &" FAILURE DETECTED IN SOLVE: #threads for KEEP(401)",
2590 &" changed from",keep(400)," at analysis to", nomp
2591 ENDIF
2592 ENDIF
2593C}
2594 ENDIF
2595 IF (keep(400) .GT. 0) THEN
2596 CALL cmumps_sol_l0omp_li(keep(400))
2597 ENDIF
2598C ==============================
2599C BLOCKING ON the number of RHS
2600C We work on a maximum of NBRHS at a time.
2601C the leading dimension of RHS is id%LRHS on master
2602C and is set to N on slaves
2603C ==============================
2604C We may want to allow to have NBRHS that varies
2605C this is typically the case when a partitionning of
2606C the right hand side is performed and leads to
2607C irregular partitions.
2608C We only have to be sure that the size of each partition
2609C is smaller than NBRHS.
2610 beg_rhs=1
2611 DO WHILE (beg_rhs.LE.nrhs_nonempty)
2612C {
2613C ==========================
2614C -- NBRHS : Original block size
2615C -- BEG_RHS : Column index of the first RHS in the list of
2616C non empty RHS (RHS_LOC) to
2617C be processed during this iteration
2618C -- NBRHS_EFF : Effective block size at current iteration
2619C In case of sparse RHS (KEEP(248)==1) NBRHS_EFF only refers to
2620C non-empty columns and is used to compute NBCOL_INBLOC
2621C -- NBCOL_INBLOC : the number of columns of sparse RHS needed
2622C to get NBRHS_EFF non empty columns columns of
2623C sparse RHS processed at each step
2624C
2625 nbrhs_eff = min(nrhs_nonempty-beg_rhs+1, nbrhs)
2626C
2627C Sparse RHS
2628C Free space and reset pointers if needed
2629 IF (irhs_sparse_copy_allocated) THEN
2630 nb_bytes = nb_bytes -
2631 & int(size(irhs_sparse_copy),8)*k34_8
2632 DEALLOCATE(irhs_sparse_copy)
2633 irhs_sparse_copy_allocated=.false.
2634 NULLIFY(irhs_sparse_copy)
2635 ENDIF
2636 IF (irhs_ptr_copy_allocated) THEN
2637 nb_bytes = nb_bytes -
2638 & int(size(irhs_ptr_copy),8)*k34_8
2639 DEALLOCATE(irhs_ptr_copy)
2640 irhs_ptr_copy_allocated=.false.
2641 NULLIFY(irhs_ptr_copy)
2642 ENDIF
2643 IF (rhs_sparse_copy_allocated) THEN
2644 nb_bytes = nb_bytes -
2645 & int(size(rhs_sparse_copy),8)*k35_8
2646 DEALLOCATE(rhs_sparse_copy)
2647 rhs_sparse_copy_allocated=.false.
2648 NULLIFY(rhs_sparse_copy)
2649 ENDIF
2650C
2651C ===========================================================
2652C Set LD_RHS and IBEG for the accesses to id%RHS (in cases
2653C id%RHS is accessed). Remark that IBEG might still be
2654C overwritten later, in case of general sparse right-hand side
2655C and centralized solution to skip empty columns
2656C ===========================================================
2657 IF (
2658C slave procs
2659 & ( id%MYID .NE. master )
2660C even on master when RHS not allocated
2661 & .or.
2662C Case of Master working but with distributed sol and
2663C ( sparse RHS or null space )
2664C -- Allocate not needed on host not working
2665 & ( i_am_slave .AND. id%MYID .EQ. master .AND.
2666 & icntl21 .NE.0 .AND.
2667 & ( keep(248).ne.0 .OR. keep(221).EQ.2
2668 & .OR. keep(111).NE.0 )
2669 & )
2670 & .or.
2671C Case of Master and
2672C (compute entries of INV(A))
2673C Even when I am a master with host not working I
2674C am in charge of gathering solution to scale it
2675C and to copy it back in the sparse RHS format
2676 & ( id%MYID .EQ. master .AND. (keep(237).NE.0) )
2677C
2678 & ) THEN
2679 ld_rhs = id%N
2680 ibeg = 1
2681 ELSE
2682 ! (id%MYID .eq. MASTER)
2683 IF ( associated(id%RHS) ) THEN
2684C Leading dimension of RHS on master is id%LRHS
2685 ld_rhs = max(id%LRHS, id%N)
2686 ELSE
2687C --- LRHS might not be defined (dont use it)
2688 ld_rhs = id%N
2689 ENDIF
2690 ibeg = int(beg_rhs-1,8) * int(ld_rhs,8) + 1_8
2691 ENDIF
2692C JBEG_RHS might also be used in DISTRIBUTED_SOLUTION
2693C even when RHS is not sparse on input. In this case,
2694C there are no empty columns. (If RHS is sparse JBEG_RHS
2695C is overwritten).
2696 jbeg_rhs = beg_rhs
2697C ==========================================
2698C Shift empty columns in case of sparse RHS
2699C ==========================================
2700 IF ( (id%MYID.EQ.master) .AND.
2701 & keep(248)==1 ) THEN
2702C update position of JBEG_RHS on first non-empty
2703C column of this block
2704 jbeg_rhs = jend_rhs + 1
2705 IF (do_permute_rhs.OR.interleave_par) THEN
2706 DO WHILE ( id%IRHS_PTR(perm_rhs(jbeg_rhs)) .EQ.
2707 & id%IRHS_PTR(perm_rhs(jbeg_rhs)+1) )
2708C Empty column
2709 IF ((keep(237).EQ.0).AND.(icntl21.EQ.0).AND.
2710 & (keep(221).NE.1) ) THEN
2711C General sparse RHS (NOT A-1) and centralized solution
2712C Set to zero part of the
2713C solution corresponding to empty columns
2714 DO i=1, id%N
2715 id%RHS(int(perm_rhs(jbeg_rhs) -1,8)*int(ld_rhs,8)+
2716 & int(i,8)) = zero
2717 ENDDO
2718 ENDIF
2719 jbeg_rhs = jbeg_rhs +1
2720 ENDDO
2721 ELSE
2722 DO WHILE( id%IRHS_PTR(jbeg_rhs) .EQ.
2723 & id%IRHS_PTR(jbeg_rhs+1) )
2724 IF ((keep(237).EQ.0).AND.(icntl21.EQ.0).AND.
2725 & (keep(221).NE.1) ) THEN
2726C Case of general sparse RHS (NOT A-1) and
2727C centralized solution: set to zero part of
2728C the solution corresponding to empty columns
2729 DO i=1, id%N
2730 id%RHS(int(jbeg_rhs -1,8)*int(ld_rhs,8) +
2731 & int(i,8)) = zero
2732 ENDDO
2733 ENDIF
2734 IF (keep(221).EQ.1) THEN
2735C Reduced RHS set to ZERO
2736 DO i = 1, id%SIZE_SCHUR
2737 id%REDRHS(int(jbeg_rhs-1,8)*int(ld_redrhs,8) +
2738 & int(i,8)) = zero
2739 ENDDO
2740 ENDIF
2741 jbeg_rhs = jbeg_rhs +1
2742 ENDDO
2743 ENDIF ! End DO_PERMUTE_RHS.OR.INTERLEAVE_PAR
2744C Count nb of RHS columns skipped: useful for
2745C * CMUMPS_DISTRIBUTED_SOLUTION to reset those
2746C columns to zero.
2747C * in case of reduced right-hand side, to set
2748C corresponding entries of RHSCOMP to 0 after
2749C forward phase.
2750 nb_rhsskipped = jbeg_rhs - (jend_rhs + 1)
2751 IF ((keep(248).EQ.1).AND.(keep(237).EQ.0)
2752 & .AND. (icntl21.EQ.0))
2753 & THEN
2754 ! case of general sparse rhs with centralized solution,
2755 !set IBEG to shifted columns
2756 ! (after empty columns have been skipped)
2757 ibeg = int(jbeg_rhs-1,8) * int(ld_rhs,8) + 1_8
2758 ENDIF
2759 ENDIF ! of if (id%MYID.EQ.MASTER) .AND. KEEP(248)==1
2760 CALL mpi_bcast( jbeg_rhs, 1, mpi_integer,
2761 & master, id%COMM, ierr )
2762C
2763C Shift on REDRHS in reduced RHS functionality
2764C
2765 IF (id%MYID.EQ.master .AND. keep(221).NE.0) THEN
2766C Initialize IBEG_REDRHS
2767C Note that REDRHS always has id%NRHS Colmuns
2768 ibeg_redrhs= int(jbeg_rhs-1,8)*int(ld_redrhs,8) + 1_8
2769 ELSE
2770 ibeg_redrhs=-142424_8 ! Should not be used
2771 ENDIF
2772C
2773C =====================
2774C BEGIN
2775C Prepare RHS on master
2776C
2777#if defined(V_T)
2778 CALL vtbegin(perm_scal_ini,ierr)
2779#endif
2780 IF (id%MYID .eq. master) THEN
2781C ======================
2782 IF (keep(248)==1) THEN
2783C ======================
2784C
2785C Sparse RHS format ( A-1 or sparse input format)
2786C is provided as input by the user (IRHS_SPARSE ...)
2787C --------------------------------------------------
2788C Compute NZ_THIS_BLOCK and NBCOL_INBLOC
2789C where
2790C NZ_THIS_BLOCK is defined
2791C as the number of entries in the next NBRHS_EFF
2792C non empty columns (note that since they might be permuted
2793C then the following formula is not always valid:
2794C NZ_THIS_BLOCK=id%IRHS_PTR(BEG_RHS+NBRHS_EFF)-
2795C & id%IRHS_PTR(BEG_RHS)
2796C anyway NBCOL_INBLOC also need be computed so going through
2797C columns one at a time is needed.
2798C
2799 nbcol = 0
2800 nbcol_inbloc = 0
2801 nz_this_block = 0
2802C With exploit sparsity we skip empty rows up to reaching
2803C the first non empty column; then we process a block of
2804C maximum size NBRHS_EFF except if we reach another empty
2805C column. (We are not sure to have a copy allocated
2806C and thus cannot compress on the fly, as done naturally
2807C for A-1).
2808 stop_at_next_empty_col = .false.
2809 DO i=jbeg_rhs, id%NRHS
2810 nbcol_inbloc = nbcol_inbloc +1
2811 IF (do_permute_rhs.OR.interleave_par) THEN
2812C PERM_RHS(k) = i means that i is the kth
2813C column to be processed
2814C PERM_RHS should also be defined for
2815C empty columns i in A-1 (PERM_RHS(K) = i)
2816 colsize = id%IRHS_PTR(perm_rhs(i)+1)
2817 & - id%IRHS_PTR(perm_rhs(i))
2818 ELSE
2819 colsize = id%IRHS_PTR(i+1) - id%IRHS_PTR(i)
2820 ENDIF
2821 IF ((.NOT.stop_at_next_empty_col).AND.(colsize.GT.0).AND.
2822 & (keep(237).EQ.0)) THEN
2823C -- set STOP_NEXT_EMPTY_COL only for general
2824C -- sparse case (not AM-1)
2825 stop_at_next_empty_col =.true.
2826 ENDIF
2827 IF (colsize.GT.0
2828 & ) THEN
2829 nbcol = nbcol+1
2830 nz_this_block = nz_this_block + colsize
2831 ELSE IF (stop_at_next_empty_col) THEN
2832C We have reached an empty column with already selected non empty
2833C columns: reduce block size to non empty columns reached so far.
2834 nbcol_inbloc = nbcol_inbloc -1
2835 nbrhs_eff = nbcol
2836 EXIT
2837 ENDIF
2838 IF (nbcol.EQ.nbrhs_eff) EXIT
2839 ENDDO
2840 IF (nz_this_block.EQ.0) THEN
2841 WRITE(*,*) " Internal Error 16 in sol driver NZ_THIS_BLOCK=",
2842 & nz_this_block
2843 CALL mumps_abort()
2844 ENDIF
2845C
2846 IF (nbcol.NE.nbrhs_eff.AND. (keep(237).NE.0)
2847 & .AND.keep(221).NE.1) THEN
2848C With exploit sparsity for general sparse RHS (Not A-1)
2849C we skip empty rows up to reaching
2850C the first non empty column; then we process a block of
2851C maximum size NBRHS_EFF except if we reach another empty
2852C column. (We are not sure to have a copy allocated
2853C and thus cannot compress on the fly, as done naturally
2854C for A-1). Thus NBCOL might be smaller than NBRHS_EFF
2855 WRITE(6,*) ' Internal Error 8 in solution driver ',
2856 & nbcol, nbrhs_eff
2857 call mumps_abort()
2858 ENDIF
2859C -------------------------------------------------------------
2860C
2861 IF (nz_this_block .NE. 0) THEN
2862C -----------------------------------------------------------
2863C We recall that
2864C NBCOL_INBLOC is the number of columns of sparse RHS needed
2865C to get NBRHS_EFF non empty columns:
2866 ALLOCATE(irhs_ptr_copy(nbcol_inbloc+1),stat=allocok)
2867 if (allocok .GT.0 ) then
2868 info(1)=-13
2869 info(2)=nbcol_inbloc+1
2870 GOTO 30
2871 endif
2872 irhs_ptr_copy_allocated = .true.
2873 nb_bytes = nb_bytes + int(nbcol_inbloc+1,8)*k34_8
2874 nb_bytes_max = max(nb_bytes_max,nb_bytes)
2875C
2876 jend_rhs =jbeg_rhs + nbcol_inbloc - 1
2877C -----------------------------------------------------------
2878C Initialize IRHS_PTR_COPY
2879C compute local copy (compressed) of id%IRHS_PTR on Master
2880 IF (do_permute_rhs.OR.interleave_par) THEN
2881 ipos = 1
2882 j = 0
2883 DO i=jbeg_rhs, jbeg_rhs + nbcol_inbloc -1
2884 j = j+1
2885 irhs_ptr_copy(j) = ipos
2886 colsize = id%IRHS_PTR(perm_rhs(i)+1)
2887 & - id%IRHS_PTR(perm_rhs(i))
2888 ipos = ipos + colsize
2889 ENDDO
2890 ELSE
2891 ipos = 1
2892 j = 0
2893 DO i=jbeg_rhs, jbeg_rhs + nbcol_inbloc -1
2894 j = j+1
2895 irhs_ptr_copy(j) = ipos
2896 colsize = id%IRHS_PTR(i+1)
2897 & - id%IRHS_PTR(i)
2898 ipos = ipos + colsize
2899 ENDDO
2900 ENDIF ! End DO_PERMUTE_RHS.OR.INTERLEAVE_PAR
2901 irhs_ptr_copy(nbcol_inbloc+1)= ipos
2902 IF ( ipos-1 .NE. nz_this_block ) THEN
2903 WRITE(*,*) "Error in compressed copy of IRHS_PTR"
2904 ierr = 99
2905 call mumps_abort()
2906 ENDIF
2907C -----------------------------------------------------------
2908C IRHS_SPARSE : do a copy or point to the original indices
2909C
2910C Check whether IRHS_SPARSE_COPY need be allocated
2911 IF (keep(23) .NE. 0 .and. mtype .NE. 1) THEN
2912C AP = LU and At x = b ==> b need be permuted
2913 ALLOCATE(irhs_sparse_copy(nz_this_block)
2914 & ,stat=allocok)
2915 if (allocok .GT.0 ) then
2916 info(1)=-13
2917 info(2)=nz_this_block
2918 GOTO 30
2919 endif
2920 irhs_sparse_copy_allocated=.true.
2921 nb_bytes = nb_bytes + int(nz_this_block,8)*k34_8
2922 nb_bytes_max = max(nb_bytes_max,nb_bytes)
2923 ELSE IF (do_permute_rhs.OR.interleave_par.OR.
2924 & (keep(237).NE.0)) THEN
2925C Columns are not contiguous and need be copied one by one
2926C IRHS_SPARSE_COPY will hold a copy of contiguous permuted
2927C columns so an explicit copy is needed.
2928C IRHS_SPARSE_COPY is also allways allocated with A-1,
2929C to enable receiving during mumps_gather_solution
2930C . on the master in any order.
2931 ALLOCATE(irhs_sparse_copy(nz_this_block),
2932 & stat=allocok)
2933 IF (allocok .GT.0 ) THEN
2934 ierr = 99
2935 GOTO 30
2936 ENDIF
2937 irhs_sparse_copy_allocated=.true.
2938 nb_bytes = nb_bytes + int(nz_this_block,8)*k34_8
2939 nb_bytes_max = max(nb_bytes_max,nb_bytes)
2940C
2941 ENDIF
2942C
2943C Initialize IRHS_SPARSE_COPY
2944 IF (irhs_sparse_copy_allocated) THEN
2945 IF ( do_permute_rhs.OR.interleave_par ) THEN
2946 ipos = 1
2947 DO i=jbeg_rhs, jbeg_rhs + nbcol_inbloc -1
2948 colsize = id%IRHS_PTR(perm_rhs(i)+1)
2949 & - id%IRHS_PTR(perm_rhs(i))
2950 irhs_sparse_copy(ipos:ipos+colsize-1) =
2951 & id%IRHS_SPARSE(id%IRHS_PTR(perm_rhs(i)):
2952 & id%IRHS_PTR(perm_rhs(i)+1) -1)
2953 ipos = ipos + colsize
2954 ENDDO
2955 ELSE
2956 irhs_sparse_copy = id%IRHS_SPARSE(
2957 & id%IRHS_PTR(jbeg_rhs):
2958 & id%IRHS_PTR(jbeg_rhs)+nz_this_block-1)
2959 ENDIF
2960 ELSE
2961 irhs_sparse_copy
2962c * (1:NZ_THIS_BLOCK)
2963 & =>
2964 & id%IRHS_SPARSE(id%IRHS_PTR(jbeg_rhs):
2965 & id%IRHS_PTR(jbeg_rhs)+nz_this_block-1)
2966 ENDIF
2967 IF (lscal.OR.do_permute_rhs.OR.interleave_par.OR.
2968 & (keep(237).NE.0)) THEN
2969C if scaling is on or if columns of the RHS are
2970C permuted then a copy of RHS_SPARSE is needed.
2971C Also always allocated with A-1,
2972c to enable receiving during mumps_gather_solution
2973C on the master in any order.
2974C
2975 ALLOCATE(rhs_sparse_copy(nz_this_block),
2976 & stat=allocok)
2977 IF (allocok .GT.0 ) THEN
2978 info(1)=-13
2979 info(2)=nz_this_block
2980 GOTO 30
2981 ENDIF
2982 rhs_sparse_copy_allocated = .true.
2983 nb_bytes = nb_bytes + int(nz_this_block,8)*k35_8
2984 nb_bytes_max = max(nb_bytes_max,nb_bytes)
2985 ELSE
2986 IF ( keep(248)==1 ) THEN
2987 rhs_sparse_copy
2988c * (1:NZ_THIS_BLOCK)
2989 & => id%RHS_SPARSE(id%IRHS_PTR(jbeg_rhs):
2990 & id%IRHS_PTR(jbeg_rhs)+nz_this_block-1)
2991 ELSE
2992 rhs_sparse_copy
2993c * (1:NZ_THIS_BLOCK)
2994 & => id%RHS_SPARSE(id%IRHS_PTR(beg_rhs):
2995 & id%IRHS_PTR(beg_rhs)+nz_this_block-1)
2996 ENDIF
2997 ENDIF
2998 IF (do_permute_rhs.OR.interleave_par.OR.
2999 & (id%KEEP(237).NE.0)) THEN
3000 IF (id%KEEP(237).NE.0) THEN
3001C --initialized to one; it might be
3002C modified if scaling is on (one first entry in each col is scaled)
3003 rhs_sparse_copy = one
3004 ELSE IF (.NOT. lscal) THEN
3005C -- Columns are not contiguous and need be copied one by one
3006C -- This need not be done if scaling is on because it
3007C -- will done and scaled later.
3008 ipos = 1
3009 DO i=jbeg_rhs, jbeg_rhs + nbcol_inbloc -1
3010 colsize = id%IRHS_PTR(perm_rhs(i)+1)
3011 & - id%IRHS_PTR(perm_rhs(i))
3012 IF (colsize .EQ. 0) cycle
3013 rhs_sparse_copy(ipos:ipos+colsize-1) =
3014 & id%RHS_SPARSE(id%IRHS_PTR(perm_rhs(i)):
3015 & id%IRHS_PTR(perm_rhs(i)+1) -1)
3016 ipos = ipos + colsize
3017 ENDDO
3018 ENDIF
3019 ENDIF
3020C =========================
3021 IF (keep(23) .NE. 0) THEN
3022C =========================
3023* maximum transversal was performed
3024 IF (mtype .NE. 1) THEN
3025* At x = b is asked while
3026* we have AP = LU where P is the column permutation
3027* due to max trans.
3028* Therefore we need to modify rhs:
3029* b' = P-1 b (P-1=Pt)
3030* Apply column permutation to the right hand side RHS
3031* Column J of the permuted matrix corresponds to
3032* column PERMW(J) of the original matrix.
3033*
3034C ==========
3035C SPARSE RHS : permute indices rather than values
3036C ==========
3037C Solve with At X = B should never occur for A-1
3038 ipos = 1
3039 DO i=1, nbcol_inbloc
3040C Note that: (i) IRHS_PTR_COPY is compressed;
3041C (ii) columns might have been permuted
3042 colsize = irhs_ptr_copy(i+1) - irhs_ptr_copy(i)
3043 DO k = 1, colsize
3044 jperm = uns_perm_inv(irhs_sparse_copy(ipos+k-1))
3045 irhs_sparse_copy(ipos+k-1) = jperm
3046 ENDDO
3047 ipos = ipos + colsize
3048 ENDDO
3049 ENDIF ! MTYPE.NE.1
3050 ENDIF ! KEEP(23).NE.0
3051 ENDIF ! NZ_THIS_BLOCK .NE. 0
3052C -----
3053 ENDIF ! ============ keep(248)==1
3054C -----
3055 ENDIF ! (id%MYID .eq. MASTER)
3056C
3057C ===================== ERROR handling and propagation ================
3058 30 CONTINUE
3059 CALL mumps_propinfo( icntl(1), info(1),
3060 & id%COMM,id%MYID)
3061 IF (info(1) .LT.0 ) GOTO 90
3062C ======================================================================
3063C
3064C NBCOL_INBLOC depends on loop
3065 IF (keep(248)==1) THEN
3066 CALL mpi_bcast( nbcol_inbloc,1, mpi_integer,
3067 & master, id%COMM,ierr)
3068 ELSE
3069 nbcol_inbloc = nbrhs_eff
3070 ENDIF
3071 jend_rhs =jbeg_rhs + nbcol_inbloc - 1
3072 IF ((keep(111).eq.0).AND.(keep(252).EQ.0)
3073 & .AND.(keep(221).NE.2 ).AND.(keep(248).EQ.1) ) THEN
3074C ----------------------------
3075C -- SPARSE RIGHT-HAND-SIDE
3076C ----------------------------
3077 CALL mpi_bcast( nz_this_block,1, mpi_integer,
3078 & master, id%COMM,ierr)
3079 IF (id%MYID.NE.master .and. nz_this_block.NE.0) THEN
3080 ALLOCATE(irhs_sparse_copy(nz_this_block),
3081 & stat=allocok)
3082 if (allocok .GT.0 ) then
3083 info(1)=-13
3084 info(2)=nz_this_block
3085 GOTO 45
3086 endif
3087 irhs_sparse_copy_allocated=.true.
3088C RHS_SPARSE_COPY is broadcasted
3089C for A-1 even if on the slaves the initialisation of the RHS
3090C could be only based on the pattern. Doing so we
3091C broadcast the scaled version of the RHS (scaling arrays
3092C that are not available on slaves).
3093 ALLOCATE(rhs_sparse_copy(nz_this_block),
3094 & stat=allocok)
3095 if (allocok .GT.0 ) then
3096 info(1)=-13
3097 info(2)=nz_this_block
3098 GOTO 45
3099 endif
3100 rhs_sparse_copy_allocated=.true.
3101 nb_bytes = nb_bytes + int(nz_this_block,8)*(k34_8+k35_8)
3102 nb_bytes_max = max(nb_bytes_max,nb_bytes)
3103C
3104 ALLOCATE(irhs_ptr_copy(nbcol_inbloc+1),stat=allocok)
3105 if (allocok .GT.0 ) then
3106 info(1)=-13
3107 info(2)=nbcol_inbloc+1
3108 GOTO 45
3109 endif
3110 irhs_ptr_copy_allocated = .true.
3111 nb_bytes = nb_bytes + int(nbcol_inbloc+1,8)*k34_8
3112 nb_bytes_max = max(nb_bytes_max,nb_bytes)
3113 ENDIF
3114C
3115C ===================== ERROR handling and propagation ================
3116 45 CONTINUE
3117 CALL mumps_propinfo( icntl(1), info(1),
3118 & id%COMM,id%MYID)
3119 IF (info(1) .LT.0 ) GOTO 90
3120C ======================================================================
3121 IF (nz_this_block > 0) THEN
3122 CALL mpi_bcast(irhs_sparse_copy(1),
3123 & nz_this_block,
3124 & mpi_integer,
3125 & master, id%COMM,ierr)
3126 CALL mpi_bcast(irhs_ptr_copy(1),
3127 & nbcol_inbloc+1,
3128 & mpi_integer,
3129 & master, id%COMM,ierr)
3130 IF (ierr.GT.0) THEN
3131 WRITE (*,*)'NOT OK FOR ALLOC PTR ON SLAVES'
3132 call mumps_abort()
3133 ENDIF
3134 ENDIF
3135 ENDIF
3136C
3137C =========================================================
3138C INITIALIZE POSINRHSCOMP_ROW/COL, RHSCOMP and related data
3139C For distributed RHS, initialize RHSMAPINFO (at 1st block)
3140C =========================================================
3141 IF ( i_am_slave ) THEN
3142C --------------------------------------------------
3143C If I am involved in the solve and if
3144C either
3145C no null space comput (keep(111)=0) and sparse rhs
3146C or
3147C null space computation
3148C then
3149C compute POSINRHSCOMP
3150C endif
3151C
3152C Fwd in facto: in this case only POSINRHSCOMP need be computed
3153C
3154C (POSINRHSCOMP_ROW/COL indirection arrays should
3155C have been allocated once outside loop)
3156C Compute size of RHSCOMP since it might depend
3157C on the process index and of the sparsity of the RHS
3158C if it is exploited.
3159C Initialize POSINRHSCOMP_ROW/COL
3160C
3161C Note that LD_RHSCOMP and id%KEEP8(25)
3162C are not set on the host in this routine in
3163C the case of a non-working host.
3164C Note that POSINRHSCOMP is now always computed in SOL_DRIVER
3165C at least during the first block of RHS when sparsity of RHS
3166C is not exploited.
3167C -------------------------------
3168C INITTIALZE POSINRHSCOMP_ROW/COL
3169C -------------------------------
3170C
3171 IF ( keep(221).EQ.2 .AND. keep(252).EQ.0
3172 & .AND. (keep(248).NE.1 .OR. (id%NRHS.EQ.1))
3173 & ) THEN
3174C Reduced RHS was already computed during
3175C a previous forward step AND is valid.
3176C By valid we mean:
3177C -no forward in facto (KEEP(252)==0) during which
3178C POSINRHSCOMP was not computed
3179C AND
3180C -no exploit sparsity with multiple RHS
3181C because in this case POSINRHSCOMP would
3182C be valid only for the last block processed during fwd.
3183C In those cases since we only perform the backward step, we do not
3184C need to compute POSINRHSCOMP
3185 build_posinrhscomp = .false.
3186 ENDIF
3187C ------------------------
3188C INITIALIZE POSINRHSCOMP
3189C ------------------------
3190 IF (build_posinrhscomp) THEN
3191C -- we first set MTYPE_LOC and
3192C -- reset BUILD_POSINRHSCOMP for next iteration in loop
3193C
3194C general case only POSINRHSCOMP is computed
3195 build_posinrhscomp = .false.
3196! POSINRHSCOMP does not change between blocks
3197 mtype_loc = mtype
3198C
3199 IF ( (keep(111).NE.0) .OR. (keep(237).NE.0) .OR.
3200 & (keep(252).NE.0) ) THEN
3201C
3202 IF (keep(111).NE.0) THEN
3203C -- in the context of null space, we need to
3204C -- build RHSCOMP to skip SOL_R. Therefore
3205C -- we need to know for each concerned
3206C -- row index its position in
3207C -- RHSCOMP
3208C We use row indices, as these are the ones that
3209C were used to detect zero pivots during factorization.
3210C POSINRHSCOMP_ROW will allow to find the (row) index of a
3211C zero in RHSCOMP before calling CMUMPS_SOL_S. Then
3212C CMUMPS_SOL_S uses column indices to build the solution
3213C (corresponding to null space vectors)
3214 mtype_loc = 1
3215 ELSE IF (keep(252).NE.0) THEN
3216C -- Fwd in facto: since fwd is skipped we need to build POSINRHSCOMP
3217 mtype_loc = 1 ! (no transpose)
3218C BUILD_POSINRHSCOMP = .FALSE. ! POSINRHSCOMP does not change between blocks
3219 ELSE
3220C -- A-1 only
3221 mtype_loc = mtype
3222 build_posinrhscomp = .true.
3223 ENDIF
3224 ENDIF
3225C -- compute POSINRHSCOMP
3226 liw_passed=max(1,liw)
3227 IF (keep(237).EQ.0) THEN
3229 & id%NSLAVES,id%N,
3230 & id%MYID_NODES, id%PTLUST_S(1),
3231 & id%KEEP(1),id%KEEP8(1),
3232 & id%PROCNODE_STEPS(1), id%IS(1), liw_passed,
3233 & id%STEP(1),
3234 & id%POSINRHSCOMP_ROW(1), id%POSINRHSCOMP_COL(1),
3235 & id%POSINRHSCOMP_COL_ALLOC,
3236 & mtype_loc,
3237 & nbent_rhscomp, nb_fs_rhscomp_tot )
3238 nb_fs_rhscomp_f = nb_fs_rhscomp_tot
3239 ELSE
3241 & id%NSLAVES,id%N,
3242 & id%MYID_NODES, id%PTLUST_S(1), id%DAD_STEPS(1),
3243 & id%KEEP(1),id%KEEP8(1),
3244 & id%PROCNODE_STEPS(1), id%IS(1), liw,
3245 & id%STEP(1),
3246 & id%POSINRHSCOMP_ROW(1), id%POSINRHSCOMP_COL(1),
3247 & id%POSINRHSCOMP_COL_ALLOC,
3248 & mtype_loc,
3249 & irhs_ptr_copy(1), nbcol_inbloc, irhs_sparse_copy(1),
3250 & nz_this_block,perm_rhs, size(perm_rhs) , jbeg_rhs,
3251 & nbent_rhscomp,
3252 & nb_fs_rhscomp_f, nb_fs_rhscomp_tot,
3253 & uns_perm_inv, size(uns_perm_inv) ! size 1 if not used
3254 & )
3255 ENDIF
3256 ENDIF ! BUILD_POSINRHSCOMP=.TRUE.
3257 IF (build_rhsmapinfo .AND. keep(248).EQ.-1) THEN
3258C
3259C Prepare symbolic data for sends.
3260C For the moment: MAP_RHS_loc
3261C
3262 CALL mumps_sol_rhsmapinfo( id%N, id%Nloc_RHS, id%KEEP(89),
3263 & irhs_loc_ptr(1), map_rhs_loc, id%POSINRHSCOMP_ROW(1),
3264 & id%NSLAVES, id%MYID_NODES,
3265 & id%COMM_NODES, id%ICNTL(1), id%INFO(1) )
3266 build_rhsmapinfo = .false.
3267C MUMPS_SOL_RHSMAPINFO does not propagate errors
3268 ENDIF
3269 ENDIF
3270 CALL mumps_propinfo( icntl(1), info(1),
3271 & id%COMM,id%MYID)
3272 IF (info(1) .LT.0 ) GOTO 90
3273 IF (i_am_slave) THEN
3274 IF (keep(221).EQ.1) THEN
3275C we need to save the reduced RHS for all RHS to perform
3276C later the backward phase with an updated reduced RHS
3277C thus we allocate NRHS_NONEMPTY columns in one shot.
3278C Note that RHSCOMP might have been allocated in previous block
3279C and RHSCOMP has been deallocated previous to entering loop on RHS
3280 IF (.not. associated(id%RHSCOMP)) THEN
3281C So far we cannot combine this to exploit sparsity
3282C so that NBENT_RHSCOMP will not change in the loop
3283C and can be used to dimension RHSCOMP
3284C Furthermore, during bwd phase the REDRHS provided
3285C by the user might also have a different non empty
3286C column pattern than the sparse RHS provided on input to
3287C this phase: thus we need to allocate id%NRHS columns too.
3288 ld_rhscomp = max(nbent_rhscomp,1)
3289 id%KEEP8(25) = int(ld_rhscomp,8)*int(id%NRHS,8)
3290 ALLOCATE (id%RHSCOMP(id%KEEP8(25)), stat = allocok)
3291 IF ( allocok .GT. 0 ) THEN
3292 info(1)=-13
3293 CALL mumps_set_ierror(id%KEEP8(25),info(2))
3294 id%KEEP8(25)=0_8
3295 GOTO 41
3296 END IF
3297 nb_bytes = nb_bytes + id%KEEP8(25)*k35_8
3298 nb_bytes_max = max(nb_bytes_max,nb_bytes)
3299 ENDIF
3300 ENDIF
3301 IF ((keep(221).NE.1).AND.
3302 & ((keep(221).NE.2).OR.(keep(252).NE.0))
3303 & ) THEN
3304C ------------------
3305C Allocate RHSCOMP (case of RHSCOMP allocated at each block of RHS)
3306C ------------------
3307C RHSCOMP allocated per block of maximum size NBRHS
3308 ld_rhscomp = max(nbent_rhscomp, ld_rhscomp)
3309C NBRHS_EFF could be used instead on NBRHS
3310 IF (associated(id%RHSCOMP)) THEN
3311 IF ( (id%KEEP8(25).LT.int(ld_rhscomp,8)*int(nbrhs,8))
3312 & .OR. (keep(235).NE.0).OR.(keep(237).NE.0) ) THEN
3313 ! deallocate and reallocate if:
3314 ! _larger array needed
3315 ! OR
3316 ! _exploit sparsity/A-1: since size of RHSCOMP
3317 ! is expected to vary much in these cases
3318 ! this should improve locality
3319 nb_bytes = nb_bytes - id%KEEP8(25)*k35_8
3320 DEALLOCATE(id%RHSCOMP)
3321 NULLIFY(id%RHSCOMP)
3322 id%KEEP8(25)=0_8
3323 ENDIF
3324 ENDIF
3325 IF (.not. associated(id%RHSCOMP)) THEN
3326 ld_rhscomp = max(nbent_rhscomp, 1)
3327 id%KEEP8(25) = int(ld_rhscomp,8)*int(nbrhs,8)
3328 ALLOCATE (id%RHSCOMP(id%KEEP8(25)), stat = allocok )
3329 IF ( allocok .GT. 0 ) THEN
3330 info(1)=-13
3331 CALL mumps_set_ierror(id%KEEP8(25),info(2))
3332 GOTO 41
3333 END IF
3334 nb_bytes = nb_bytes + id%KEEP8(25)*k35_8
3335 nb_bytes_max = max(nb_bytes_max,nb_bytes)
3336 ENDIF
3337 ENDIF
3338 IF (keep(221).EQ.2) THEN
3339C RHSCOMP has been allocated (call with KEEP(221).EQ.1)
3340C even in the case fwd in facto
3341 ! Not correct: LD_RHSCOMP = LENRHSCOMP/id%NRHS_NONEMPTY
3342 ld_rhscomp = int(id%KEEP8(25)/int(id%NRHS,8))
3343 ENDIF
3344C
3345C Shift on RHSCOMP
3346C
3347 IF ( keep(221).EQ.0 ) THEN
3348C -- RHSCOMP reused in the loop
3349 ibeg_rhscomp= 1_8
3350 ELSE
3351C Initialize IBEG_RHSCOMP
3352C
3353 ibeg_rhscomp= int(jbeg_rhs-1,8)*int(ld_rhscomp,8) + 1_8
3354 ENDIF
3355 ENDIF ! I_AM_SLAVE
3356C ===================== ERROR handling and propagation ================
3357 41 CONTINUE
3358 CALL mumps_propinfo( icntl(1), info(1),
3359 & id%COMM,id%MYID)
3360 IF (info(1) .LT.0 ) GOTO 90
3361C ======================================================================
3362C
3363C ---------------------------
3364C Prepare RHS on master (case
3365C of dense and sparse RHS)
3366C ---------------------------
3367 IF (id%MYID .eq. master) THEN
3368C =========================
3369 IF (keep(23) .NE. 0) THEN
3370C =========================
3371* maximum transversal was performed
3372 IF (mtype .NE. 1) THEN
3373* At x = b is asked while
3374* we have AP = LU where P is the column permutation
3375* due to max trans.
3376* Therefore we need to modify rhs:
3377* b' = P-1 b (P-1=Pt)
3378* Apply column permutation to the right hand side RHS
3379* Column J of the permuted matrix corresponds to
3380* column PERMW(J) of the original matrix.
3381*
3382 IF (keep(248)==0) THEN
3383C =========
3384C DENSE RHS : permute values in RHS
3385C =========
3386 ALLOCATE( c_rw2( id%N ),stat =allocok )
3387 IF ( allocok .GT. 0 ) THEN
3388 info(1)=-13
3389 info(2)=id%N
3390 IF (lpok) THEN
3391 WRITE(lp,*) id%MYID,
3392 & ':Error allocating C_RW2 in CMUMPS_SOLVE_DRIVE'
3393 END IF
3394 GOTO 30
3395 END IF
3396C We directly permute in id%RHS.
3397 DO k = 1, nbrhs_eff
3398 kdec = ibeg+int(k-1,8)*int(ld_rhs,8)
3399 DO i = 1, id%N
3400 c_rw2(i)=id%RHS(i-1+kdec)
3401 END DO
3402 DO i = 1, id%N
3403 jperm = id%UNS_PERM(i)
3404 id%RHS(i-1+kdec) = c_rw2(jperm)
3405 END DO
3406 END DO
3407 DEALLOCATE(c_rw2)
3408 ENDIF
3409 ENDIF
3410 ENDIF
3411C
3412 IF (postpros) THEN
3413 IF ( keep(248) == 0 ) THEN
3414 DO k = 1, nbrhs_eff
3415 kdec = ibeg+int(k-1,8)*int(ld_rhs,8)
3416 DO i = 1, id%N
3417 saverhs(i+(k-1)*id%N) = id%RHS(kdec+i-1)
3418 END DO
3419 ENDDO
3420 ELSE IF (keep(248)==1) THEN
3421 saverhs(:) = zero
3422 DO k = 1, nbrhs
3423 DO j = id%IRHS_PTR(k), id%IRHS_PTR(k+1)-1
3424 i = id%IRHS_SPARSE(j)
3425 saverhs(i+(k-1)*id%N) = id%RHS_SPARSE(j)
3426 ENDDO
3427 ENDDO
3428 ENDIF
3429 ENDIF
3430C
3431C RHS is set to scaled right hand side
3432C
3433 IF (lscal) THEN
3434C scaling was performed
3435 IF (keep(248)==0) THEN
3436C dense RHS
3437 IF (mtype .EQ. 1) THEN
3438C we solve Ax=b, use ROWSCA to scale the RHS
3439 DO k =1, nbrhs_eff
3440 kdec = int(k-1,8) * int(ld_rhs,8) + int(ibeg-1,8)
3441 DO i = 1, id%N
3442 id%RHS(kdec+i) = id%RHS(kdec+i) *
3443 & id%ROWSCA(i)
3444 ENDDO
3445 ENDDO
3446 ELSE
3447C we solve Atx=b, use COLSCA to scale the RHS
3448 DO k =1, nbrhs_eff
3449 kdec = int(k-1,8) * int(ld_rhs,8) + int(ibeg-1,8)
3450 DO i = 1, id%N
3451 id%RHS(kdec+i) = id%RHS(kdec+i) *
3452 & id%COLSCA(i)
3453 ENDDO
3454 ENDDO
3455 ENDIF
3456 ELSE IF (keep(248)==1) THEN
3457C -------------------------
3458C KEEP(248)==1 (and MASTER)
3459C -------------------------
3460 kdec=int(id%IRHS_PTR(jbeg_rhs),8)
3461C Compute
3462 IF ((keep(248)==1) .AND.
3463 & (do_permute_rhs.OR.interleave_par.OR.
3464 & (id%KEEP(237).NE.0))
3465 & ) THEN
3466C -- copy from RHS_SPARSE need be done per
3467C column following PERM_RHS
3468C Columns are not contiguous and need be copied one by one
3469 ipos = 1
3470 j = 0
3471 DO i=jbeg_rhs, jbeg_rhs + nbcol_inbloc -1
3472 IF (do_permute_rhs.OR.interleave_par) THEN
3473 iperm = perm_rhs(i)
3474 ENDIF
3475 j = j+1
3476C Note that we work here on compressed IRHS_PTR_COPY
3477 colsize = irhs_ptr_copy(j+1) - irhs_ptr_copy(j)
3478C -- skip empty column
3479 IF (colsize .EQ. 0) cycle
3480 IF (id%KEEP(237).NE.0) THEN
3481 IF (do_permute_rhs.OR.interleave_par) THEN
3482C if A-1 only, then, for each non empty target
3483C column PERM_RHS(I), scale in first position
3484C in column the diagonal entry
3485C build the scaled rhs ej on each slave.
3486 rhs_sparse_copy(ipos) = id%ROWSCA(iperm) *
3487 & one
3488 ELSE
3489 rhs_sparse_copy(ipos) = id%ROWSCA(i) * one
3490 ENDIF
3491 ELSE
3492C Loop over nonzeros in column
3493 DO k = 1, colsize
3494C Formula for II below is ok, except in case
3495C of maximum transversal (KEEP(23).NE.0) and
3496C transpose system (MTYPE .NE. 1):
3497C II = id%IRHS_SPARSE(id%IRHS_PTR(PERM_RHS(I))+K-1)
3498C In case of maximum transversal + transpose, one
3499C should then apply II=UNS_PERM_INV(II) after the
3500C above definition of II.
3501C
3502C Instead, we rely on IRHS_SPARSE_COPY, whose row
3503C indices have already been permuted in case of
3504C maximum transversal.
3505 ii = irhs_sparse_copy(
3506 & irhs_ptr_copy(i-jbeg_rhs+1)
3507 & +k-1)
3508C PERM_RHS(I) corresponds to column in original RHS.
3509C Original IRHS_PTR must be used to access id%RHS_SPARSE
3510 IF (mtype.EQ.1) THEN
3511 rhs_sparse_copy(ipos+k-1) =
3512 & id%RHS_SPARSE(id%IRHS_PTR(iperm)+k-1)*
3513 & id%ROWSCA(ii)
3514 ELSE
3515 rhs_sparse_copy(ipos+k-1) =
3516 & id%RHS_SPARSE(id%IRHS_PTR(iperm)+k-1)*
3517 & id%COLSCA(ii)
3518 ENDIF
3519 ENDDO
3520 ENDIF
3521 ipos = ipos + colsize
3522 ENDDO
3523 ELSE
3524 ! general sparse RHS
3525 ! without permutation
3526 IF (mtype .eq. 1) THEN
3527 DO iz=1,nz_this_block
3528 i=irhs_sparse_copy(iz)
3529 rhs_sparse_copy(iz)=id%RHS_SPARSE(kdec+iz-1)*
3530 & id%ROWSCA(i)
3531 ENDDO
3532 ELSE
3533 DO iz=1,nz_this_block
3534 i=irhs_sparse_copy(iz)
3535 rhs_sparse_copy(iz)=id%RHS_SPARSE(kdec+iz-1)*
3536 & id%COLSCA(i)
3537 ENDDO
3538 ENDIF
3539 ENDIF
3540 ENDIF ! KEEP(248)==1
3541 ENDIF ! LSCAL
3542 ENDIF ! id%MYID.EQ.MASTER
3543#if defined(V_T)
3544 CALL vtend(perm_scal_ini,ierr)
3545#endif
3546C
3547C Prepare RHS on master
3548C END
3549C =====================
3550 IF ((keep(248).EQ.1).AND.(keep(237).EQ.0)) THEN
3551 ! case of general sparse: in case of empty columns
3552 ! modifed version of
3553 ! NBRHS_EFF need be broadcasted since it is used
3554 ! to update BEG_RHS at the end of the DO WHILE
3555 CALL mpi_bcast( nbrhs_eff,1, mpi_integer,
3556 & master, id%COMM,ierr)
3557 CALL mpi_bcast(nb_rhsskipped,1,mpi_integer,master,
3558 & id%COMM,ierr)
3559 ENDIF
3560C -----------------------------------
3561C Two main cases depending on option
3562C for null space computation:
3563C
3564C KEEP(111)=0 : use RHS from user
3565C (sparse or dense)
3566C KEEP(111)!=0: build an RHS on each
3567C proc for null space
3568C computations
3569C -----------------------------------
3570#if defined(V_T)
3571 CALL vtbegin(soln_dist,ierr)
3572#endif
3573 timescatter1=mpi_wtime()
3574 IF ((keep(111).eq.0).AND.(keep(252).EQ.0)
3575 & .AND.(keep(221).NE.2 )) THEN
3576C ------------------------
3577C Use RHS provided by user
3578C when not null space and not Fwd in facto
3579C ------------------------
3580 IF (keep(248) == 0) THEN
3581C ----------------------------
3582C -- DENSE RIGHT-HAND-SIDE
3583C ----------------------------
3584 IF ( .NOT.i_am_slave ) THEN
3585C -- Master not working
3586 CALL cmumps_scatter_rhs(id%NSLAVES,id%N, id%MYID,
3587 & id%COMM,
3588 & mtype, id%RHS(ibeg), ld_rhs, nbrhs_eff,
3589 & nbrhs_eff,
3590 & c_dummy, 1, 1,
3591 & idummy, 0,
3592 & jdummy, id%KEEP(1), id%KEEP8(1), id%PROCNODE_STEPS(1),
3593 & idummy, 1,
3594 & id%STEP(1),
3595 & id%ICNTL(1),id%INFO(1))
3596 ELSE
3597 IF (id%MYID .eq. master) THEN
3598 ptr_rhs => id%RHS
3599 ld_rhs_loc = ld_rhs
3600 ncol_rhs_loc = nbrhs_eff
3601 ibeg_loc = ibeg
3602 ELSE
3603 ptr_rhs => cdummy_target
3604 ld_rhs_loc = 1
3605 ncol_rhs_loc = 1
3606 ibeg_loc = 1_8
3607 ENDIF
3608 liw_passed = max( liw, 1 )
3609 CALL cmumps_scatter_rhs(id%NSLAVES,id%N, id%MYID,
3610 & id%COMM,
3611 & mtype, ptr_rhs(ibeg_loc),ld_rhs_loc,ncol_rhs_loc,
3612 & nbrhs_eff,
3613 & id%RHSCOMP(ibeg_rhscomp), ld_rhscomp, nbrhs_eff,
3614 & id%POSINRHSCOMP_ROW(1), nb_fs_rhscomp_f,
3615C
3616 & id%PTLUST_S(1), id%KEEP(1), id%KEEP8(1),
3617 & id%PROCNODE_STEPS(1),
3618 & is(1), liw_passed,
3619 & id%STEP(1),
3620 & id%ICNTL(1),id%INFO(1))
3621 ENDIF
3622 IF (info(1).LT.0) GOTO 90
3623 ELSE IF (keep(248) .EQ. -1) THEN
3624 IF (i_am_slave) THEN
3625 IF (id%Nloc_RHS .NE. 0) THEN
3626 rhs_loc_size=int(id%LRHS_loc,8)*int(nbrhs_eff-1,8)+
3627 & int(id%Nloc_RHS,8)
3628 rhs_loc_shift=1_8+int(beg_rhs-1,8)*id%LRHS_loc
3629 ELSE
3630 rhs_loc_size=1_8
3631 rhs_loc_shift=1_8
3632 ENDIF
3633 CALL cmumps_scatter_dist_rhs(id%NSLAVES, id%N,
3634 & id%MYID_NODES, id%COMM_NODES,
3635 & nbrhs_eff, id%Nloc_RHS, id%LRHS_loc,
3636 & map_rhs_loc,
3637 & irhs_loc_ptr(1),
3638 & idrhs_loc(rhs_loc_shift),
3639 & rhs_loc_size,
3640 & id%RHSCOMP(ibeg_rhscomp), ld_rhscomp,
3641 & id%POSINRHSCOMP_ROW(1), nb_fs_rhscomp_f,
3642 & lscal, scaling_data_dr,
3643 & lp, lpok, keep(1), nb_bytes_loc, info(1))
3644C NB_BYTES_LOC were allocated and freed above
3645 nb_bytes_max = max(nb_bytes_max,
3646 & nb_bytes_max+nb_bytes_loc)
3647 ENDIF
3648 CALL mumps_propinfo( icntl(1), info(1),
3649 & id%COMM,id%MYID)
3650 IF (info(1).LT.0) GOTO 90
3651 ELSE
3652C === KEEP(248)==1 =========
3653C -- SPARSE RIGHT-HAND-SIDE
3654C ----------------------------
3655 IF (nz_this_block > 0) THEN
3656 CALL mpi_bcast(rhs_sparse_copy(1),
3657 & nz_this_block,
3658 & mpi_complex,
3659 & master, id%COMM, ierr)
3660 ENDIF
3661C -- At this point each process has a copy of the
3662C -- sparse RHS. We need to store it into RHSCOMP.
3663C
3664 IF (keep(237).NE.0) THEN
3665 IF ( i_am_slave ) THEN
3666C -----
3667C case of A-1
3668C -----
3669C - Take columns with non-zero entry, say j,
3670C - to build Ej and store it in RHSCOMP
3671 k=1 ! Column index in RHSCOMP
3672 id%RHSCOMP(1_8:int(nbrhs_eff,8)*int(ld_rhscomp,8))
3673 & = zero
3674 ipos = 1
3675 DO i = 1, nbcol_inbloc
3676 colsize = irhs_ptr_copy(i+1) - irhs_ptr_copy(i)
3677 IF (colsize.GT.0) THEN
3678 ! Find global column index J and set
3679 ! column K of RHSCOMP to ej (here IBEG is one)
3680 j = i - 1 + jbeg_rhs
3681 IF (do_permute_rhs.OR.interleave_par) THEN
3682 j = perm_rhs(j)
3683 ENDIF
3684 iposrhscomp = id%POSINRHSCOMP_ROW(j)
3685C IF ( (IPOSRHSCOMP.LE.NB_FS_RHSCOMP_F)
3686C & .AND.(IPOSRHSCOMP.GT.0) ) THEN
3687 IF (iposrhscomp.GT.0) THEN
3688C Columns J corresponds to ej and thus to variable j
3689C that is on my proc
3690C Note that :
3691C In first entry in column
3692C we have and MUST have already scaled value of diagonal.
3693C This need have been done on master because we do not
3694C have scaling arrays available on slaves.
3695C Furthermore we know that only one entry is
3696C needed the diagonal entry (for the forward with A-1).
3697C
3698 id%RHSCOMP(int(k-1,8)*int(ld_rhscomp,8)+
3699 & int(iposrhscomp,8)) =
3700 & rhs_sparse_copy(ipos)
3701 ENDIF ! End of J on my proc
3702 k = k + 1
3703 ipos = ipos + colsize ! go to next column
3704 ENDIF
3705 ENDDO
3706 IF (k.NE.nbrhs_eff+1) THEN
3707 WRITE(6,*) 'Internal Error 9 in solution driver ',
3708 & k,nbrhs_eff
3709 call mumps_abort()
3710 ENDIF
3711 ENDIF ! I_AM_SLAVE
3712C -------
3713c END A-1
3714C -------
3715 ELSE
3716C --------------
3717C General sparse
3718C --------------
3719C -- reset to zero RHSCOMP for skipped columns (if any)
3720 IF ((keep(221).EQ.1).AND.(nb_rhsskipped.GT.0)
3721 & .AND.i_am_slave) THEN
3722 DO k = jbeg_rhs-nb_rhsskipped, jbeg_rhs-1
3723 DO i = 1, ld_rhscomp
3724 id%RHSCOMP(int(k-1,8)*int(ld_rhscomp,8)
3725 & + int(i,8)) = zero
3726 ENDDO
3727 ENDDO
3728 ENDIF
3729 IF (i_am_slave) THEN
3730 DO k = 1, nbcol_inbloc
3731! it is equal to NBRHS_EFF in this case
3732 kdec = int(k-1,8) * int(ld_rhscomp,8) +
3733 & ibeg_rhscomp - 1_8
3734 id%RHSCOMP(kdec+1_8:kdec+nbent_rhscomp) = zero
3735 DO iz=irhs_ptr_copy(k), irhs_ptr_copy(k+1)-1
3736 i=irhs_sparse_copy(iz)
3737 iposrhscomp = id%POSINRHSCOMP_ROW(i)
3738C Since all fully summed variables mapped
3739C on each proc are stored at the beginning
3740C of RHSCOMP, we can compare to KEEP(89)
3741C to know if RHSCOMP should be initialized
3742C So far the tree has not been pruned to exploit
3743C sparsity to compress RHSCOMP so we compare to
3744C NB_FS_RHSCOMP_TOT
3745 IF ( (iposrhscomp.LE.nb_fs_rhscomp_tot)
3746 & .AND.(iposrhscomp.GT.0) ) THEN
3747C ! I is fully summed var mapped on my proc
3748 id%RHSCOMP(kdec+iposrhscomp)=
3749 & id%RHSCOMP(kdec+iposrhscomp) +
3750 & rhs_sparse_copy(iz)
3751 ENDIF
3752 ENDDO
3753 ENDDO
3754 END IF ! I_AM_SLAVE
3755 ENDIF ! KEEP(237)
3756 ENDIF ! ==== KEEP(248)==1 =====
3757C
3758 ELSE IF (i_am_slave) THEN
3759 ! I_AM_SLAVE AND (null space or Fwd in facto)
3760 IF (keep(111).NE.0) THEN
3761C -----------------------
3762C Null space computations
3763C -----------------------
3764C
3765C We are working on columns BEG_RHS:BEG_RHS+NBRHS_EFF-1
3766C of RHS.
3767C Columns in 1..KEEP(112):
3768C Put a one in corresponding
3769C position of the right-hand-side,
3770C and zeros in other places.
3771C Columns in KEEP(112)+1: KEEP(112)+KEEP(17):
3772C root node => set
3773C 0 everywhere and compute the local range
3774C corresponding to IBEG/IEND in root
3775C that will be passed to CMUMPS_SEQ_SOLVE_ROOT_RR
3776C Also keep track of which part of
3777C CMUMPS_RHS must be passed to
3778C CMUMPS_SEQ_SOLVE_ROOT_RR.
3779C
3780 IF (keep(111).GT.0) THEN
3781 ibeg_glob_def = keep(111)
3782 iend_glob_def = keep(111)
3783 ELSE
3784 ibeg_glob_def = beg_rhs
3785 iend_glob_def = beg_rhs+nbrhs_eff-1
3786 ENDIF
3787 IF ( id%KEEP(112) .GT. 0 .AND. do_null_piv) THEN
3788 IF (ibeg_glob_def .GT.id%KEEP(112)) THEN
3789 id%KEEP(235) = 0
3790 do_null_piv = .false.
3791 ENDIF
3792 IF (ibeg_glob_def .LT.id%KEEP(112)
3793 & .AND. iend_glob_def .GT.id%KEEP(112)
3794 & .AND. do_null_piv ) THEN
3795C IEND_GLOB_DEF = id%KEEP(112)
3796C forcing exploit sparsity
3797C - cannot be done at this point
3798C - and is not what the user would have expected the
3799C code to to do anyway !!!!
3800C suppress: id%KEEP(235) = 1 ! End Block of sparsity ON
3801 do_null_piv = .false.
3802 ENDIF
3803 ENDIF
3804 IF (id%KEEP(235).NE.0) THEN
3805C Exploit Sparsity in null space computations
3806C We build /allocate the sparse RHS on MASTER
3807C based on pivnul_list. Then we broadcast it
3808C on the slaves
3809C In this case we have ONLY ONE ENTRY per RHS
3810C
3811 nz_this_block=iend_glob_def-ibeg_glob_def+1
3812 ALLOCATE(irhs_ptr_copy(nz_this_block+1),stat=allocok)
3813 IF (allocok .GT.0 ) THEN
3814 info(1)=-13
3815 info(2)=nz_this_block
3816 GOTO 50
3817 ENDIF
3818 irhs_ptr_copy_allocated = .true.
3819 ALLOCATE(irhs_sparse_copy(nz_this_block),stat=allocok)
3820 IF (allocok .GT.0 ) THEN
3821 info(1)=-13
3822 info(2)=nz_this_block
3823 GOTO 50
3824 ENDIF
3825 irhs_sparse_copy_allocated=.true.
3826 nb_bytes = nb_bytes +
3827 & int(nz_this_block,8)*(k34_8+k34_8)
3828 & + k34_8
3829 nb_bytes_max = max(nb_bytes_max,nb_bytes)
3830 IF (id%MYID.eq.master) THEN
3831 ! compute IRHS_PTR and IRHS_SPARSE_COPY
3832 ii = 1
3833 DO i = ibeg_glob_def, iend_glob_def
3834 irhs_ptr_copy(i-ibeg_glob_def+1) = i
3835 irhs_sparse_copy(ii) = id%PIVNUL_LIST(i)
3836 ii = ii +1
3837 ENDDO
3838 irhs_ptr_copy(nz_this_block+1) = nz_this_block+1
3839 ENDIF
3840C
3841C ===================== ERROR handling and propagation ================
3842 50 CONTINUE
3843 CALL mumps_propinfo( icntl(1), info(1),
3844 & id%COMM,id%MYID)
3845 IF (info(1) .LT.0 ) GOTO 90
3846C ======================================================================
3847 CALL mpi_bcast(irhs_sparse_copy(1),
3848 & nz_this_block,
3849 & mpi_integer,
3850 & master, id%COMM,ierr)
3851 CALL mpi_bcast(irhs_ptr_copy(1),
3852 & nz_this_block+1,
3853 & mpi_integer,
3854 & master, id%COMM,ierr)
3855C End IF Exploit Sparsity
3856 ENDIF
3857c
3858C Initialize RHSCOMP to 0 ! to be suppressed
3859 DO k=1, nbrhs_eff
3860 kdec = int(k-1,8) * int(ld_rhscomp,8)
3861 id%RHSCOMP(kdec+1_8:kdec+int(ld_rhscomp,8))=zero
3862 END DO
3863C Loop over the columns.
3864C Note that if ( KEEP(220)+KEEP(109)-1 < IBEG_GLOB_DEF
3865C .OR. KEEP(220) > IEND_GLOB_DEF ) then we do not enter
3866C the loop.
3867C Note that local processor has indices
3868C KEEP(220):KEEP(220)+KEEP(109)-1
3869C
3870C Computation of null space and computation of backward
3871C step incompatible, do one or the other.
3872 DO i=max(ibeg_glob_def,keep(220)),
3873 & min(iend_glob_def,keep(220)+keep(109)-1)
3874C Local processor is concerned by I-th column of
3875C global right-hand side.
3876 jj= id%POSINRHSCOMP_ROW(id%PIVNUL_LIST(i-keep(220)+1))
3877 IF (jj.GT.0) THEN
3878 IF (keep(50).EQ.0) THEN
3879 ! unsymmetric : always set to fixation
3880 id%RHSCOMP( ibeg_rhscomp+
3881 & int(i-ibeg_glob_def,8)*int(ld_rhscomp,8) +
3882 & int(jj-1,8) ) =
3883 & cmplx(id%DKEEP(2),kind=kind(id%RHSCOMP))
3884 ELSE
3885 ! Symmetric: always set to one
3886 id%RHSCOMP( ibeg_rhscomp+
3887 & int(i-ibeg_glob_def,8)*int(ld_rhscomp,8)+
3888 & int(jj-1,8) )=
3889 & one
3890 ENDIF
3891 ENDIF
3892 ENDDO
3893 IF ( keep(17).NE.0 .AND.
3894 & id%MYID_NODES.EQ.master_root) THEN
3895C ---------------------------
3896C Deficiency of the root node
3897C Find range relative to root
3898C ---------------------------
3899C Among IBEG_GLOB_DEF:IEND_GLOB_DEF, find
3900C intersection with KEEP(112)+1:KEEP(112)+KEEP(17)
3901 ibeg_root_def = max(ibeg_glob_def,keep(112)+1)
3902 iend_root_def = min(iend_glob_def,keep(112)+keep(17))
3903C First column of right-hand side that must
3904C be passed to CMUMPS_SEQ_SOLVE_ROOT_RR is:
3905 iroot_def_rhs_col1 = ibeg_root_def-ibeg_glob_def + 1
3906C We look for indices relatively to the root node,
3907C substract number of null pivots outside root node
3908 ibeg_root_def = ibeg_root_def-keep(112)
3909 iend_root_def = iend_root_def-keep(112)
3910C Note that if IBEG_ROOT_DEF > IEND_ROOT_DEF, then this
3911C means that nothing must be done on the root node
3912C for this set of right-hand sides.
3913 ELSE
3914 ibeg_root_def = -90999
3915 iend_root_def = -95999
3916 iroot_def_rhs_col1= 1
3917 ENDIF
3918 ELSE ! End of null space (test on KEEP(111))
3919C case of Fwd in facto
3920C id%RHSCOMP need not be initialized. It will be set on the fly
3921C to zero for normal fully summed variables of the fronts and
3922C to -1 on the roots for the id%N+KEEP(253) variables added
3923C to the roots.
3924 ENDIF ! End of null space (test on KEEP(111))
3925 ENDIF ! I am slave
3926 timescatter2=mpi_wtime()-timescatter1+timescatter2
3927C -------------------------------------------
3928C Reserve space at the end of WORK_WCB on the
3929C master of the root node. It will be used to
3930C store the reduced RHS.
3931C -------------------------------------------
3932 IF ( i_am_slave ) THEN
3933 lwcb8_sol_c = lwcb8
3934 IF ( id%MYID_NODES .EQ. master_root ) THEN
3935C This is a special root (otherwise MASTER_ROOT < 0)
3936 IF ( associated(id%root%RHS_CNTR_MASTER_ROOT) ) THEN
3937C RHS_CNTR_MASTER_ROOT may have been allocated
3938C during the factorization phase.
3939 ptr_rhs_root => id%root%RHS_CNTR_MASTER_ROOT
3940# if defined(MUMPS_F2003)
3941 lptr_rhs_root = size(id%root%RHS_CNTR_MASTER_ROOT,kind=8)
3942# else
3943 lptr_rhs_root = int(size(id%root%RHS_CNTR_MASTER_ROOT),8)
3944# endif
3945 ELSE
3946C Otherwise, we use workspace in WCB
3947 lptr_rhs_root = int(nbrhs_eff,8) * int(size_root,8)
3948 ipt_rhs_root = lwcb8 - lptr_rhs_root + 1_8
3949 ptr_rhs_root => work_wcb(ipt_rhs_root:lwcb8)
3950 lwcb8_sol_c = lwcb8_sol_c - lptr_rhs_root
3951 ENDIF
3952 ELSE
3953 lptr_rhs_root = 1_8
3954 ipt_rhs_root = lwcb8 ! Will be passed, but not accessed
3955 ptr_rhs_root => work_wcb(ipt_rhs_root:lwcb8)
3956 lwcb8_sol_c = lwcb8_sol_c - lptr_rhs_root
3957 ENDIF
3958 ENDIF
3959 IF (keep(221) .EQ. 2 ) THEN
3960C Copy/send REDRHS in PTR_RHS_ROOT
3961C (column by column if leading dimension LD_REDRHS
3962C of REDRHS is not equal to SIZE_ROOT).
3963C REDRHS was provided on the host
3964 IF ( ( id%MYID .EQ. master_root_in_comm ) .AND.
3965 & ( id%MYID .EQ. master ) ) THEN
3966C -- Same proc : copy is possible:
3967 ii = 0
3968 DO k=1, nbrhs_eff
3969 kdec = ibeg_redrhs+int(k-1,8)*int(ld_redrhs,8)-1_8
3970 DO i = 1, size_root
3971 ptr_rhs_root(ii+i) = id%REDRHS(kdec+i)
3972 ENDDO
3973 ii = ii+size_root
3974 ENDDO
3975 ELSE
3976C -- send REDRHS
3977 IF ( id%MYID .EQ. master) THEN
3978C -- send to MASTER_ROOT_IN_COMM using COMM communicator
3979C assert: id%KEEP(116).EQ.SIZE_ROOT
3980 IF (ld_redrhs.EQ.size_root) THEN
3981C -- One send
3982 kdec = ibeg_redrhs
3983 CALL mpi_send(id%REDRHS(kdec),
3984 & size_root*nbrhs_eff,
3985 & mpi_complex,
3986 & master_root_in_comm, 0, id%COMM,ierr)
3987 ELSE
3988C -- NBRHS_EFF sends
3989 DO k=1, nbrhs_eff
3990 kdec = ibeg_redrhs+int(k-1,8)*int(ld_redrhs,8)
3991 CALL mpi_send(id%REDRHS(kdec),size_root,
3992 & mpi_complex,
3993 & master_root_in_comm, 0, id%COMM,ierr)
3994 ENDDO
3995 ENDIF
3996 ELSE IF ( id%MYID .EQ. master_root_in_comm ) THEN
3997C -- receive from MASTER
3998 ii = 1
3999 IF (ld_redrhs.EQ.size_root) THEN
4000C -- receive all in on shot
4001 CALL mpi_recv(ptr_rhs_root(ii),
4002 & size_root*nbrhs_eff,
4003 & mpi_complex,
4004 & master, 0, id%COMM,status,ierr)
4005 ELSE
4006 DO k=1, nbrhs_eff
4007 CALL mpi_recv(ptr_rhs_root(ii),size_root,
4008 & mpi_complex,
4009 & master, 0, id%COMM,status,ierr)
4010 ii = ii + size_root
4011 ENDDO
4012 ENDIF
4013 ENDIF
4014C -- other procs are not concerned
4015 ENDIF
4016 ENDIF
4017 timec1=mpi_wtime()
4018 IF ( i_am_slave ) THEN
4019 liw_passed = max( liw, 1 )
4020 la_passed = max( la, 1_8 )
4021C
4022 IF ((id%KEEP(235).EQ.0).and.(id%KEEP(237).EQ.0) ) THEN
4023C
4024C --- Normal case : we do not exploit sparsity of the RHS
4025C
4026 from_pp = .false.
4027 nbsparse_loc = (do_nbsparse.AND.nbrhs_eff.GT.1)
4028 pruned_size_loaded = 0_8 ! From CMUMPS_SOL_ES module
4029 CALL cmumps_sol_c(id%root, id%N, id%S(1), la_passed, is(1),
4030 & liw_passed, work_wcb(1), lwcb8_sol_c, iwcb, liwcb, nbrhs_eff,
4031 & id%NA(1),id%LNA,id%NE_STEPS(1), srw3, mtype, icntl(1), from_pp,
4032 & id%STEP(1), id%FRERE_STEPS(1), id%DAD_STEPS(1), id%FILS(1),
4033 & id%PTLUST_S(1), id%PTRFAC(1), iwk_solve, liwk_solve, ptracb,
4034 & liwk_ptracb, id%PROCNODE_STEPS(1), id%NSLAVES, info(1),keep(1),
4035 & keep8(1), id%DKEEP(1), id%COMM_NODES, id%MYID, id%MYID_NODES,
4036 & bufr(1), lbufr, lbufr_bytes, id%ISTEP_TO_INIV2(1),
4037 & id%TAB_POS_IN_PERE(1,1), ibeg_root_def, iend_root_def,
4038 & iroot_def_rhs_col1, ptr_rhs_root(1), lptr_rhs_root, size_root,
4039 & master_root, id%RHSCOMP(ibeg_rhscomp), ld_rhscomp,
4040 & id%POSINRHSCOMP_ROW(1), id%POSINRHSCOMP_COL(1)
4041 & , 1, 1, 1, 1, idummy, 1, jdummy, kdummy, 1, ldummy, 1, mdummy
4042 & , 1, 1, nbsparse_loc, ptr_rhs_bounds(1), lptr_rhs_bounds
4043 & , id%IPOOL_B_L0_OMP(1), id%LPOOL_B_L0_OMP, id%IPOOL_A_L0_OMP(1),
4044 & id%LPOOL_A_L0_OMP, id%L_VIRT_L0_OMP, id%VIRT_L0_OMP(1),
4045 & id%L_PHYS_L0_OMP, id%PHYS_L0_OMP(1), id%PERM_L0_OMP(1),
4046 & id%PTR_LEAFS_L0_OMP(1), id%L0_OMP_MAPPING(1), id%LL0_OMP_MAPPING,
4047 & id%L0_OMP_FACTORS(1), id%LL0_OMP_FACTORS
4048 & )
4049 ELSE
4050C Exploit sparsity of the RHS (all cases)
4051C Remark that JBEG_RHS is already initialized
4052C
4053 from_pp = .false.
4054 nbsparse_loc = (do_nbsparse.AND.nbrhs_eff.GT.1)
4055 CALL cmumps_sol_c(id%root, id%N, id%S(1), la_passed,is(1),
4056 & liw_passed,work_wcb(1),lwcb8_sol_c,iwcb,liwcb,nbrhs_eff,id%NA(1),
4057 & id%LNA,id%NE_STEPS(1),srw3,mtype,icntl(1),from_pp,id%STEP(1),
4058 & id%FRERE_STEPS(1), id%DAD_STEPS(1), id%FILS(1), id%PTLUST_S(1),
4059 & id%PTRFAC(1), iwk_solve, liwk_solve, ptracb, liwk_ptracb,
4060 & id%PROCNODE_STEPS(1),id%NSLAVES,info(1),keep(1), keep8(1),
4061 & id%DKEEP(1),id%COMM_NODES,id%MYID,id%MYID_NODES,bufr(1),lbufr,
4062 & lbufr_bytes, id%ISTEP_TO_INIV2(1), id%TAB_POS_IN_PERE(1,1),
4063 & ibeg_root_def,iend_root_def,iroot_def_rhs_col1,ptr_rhs_root(1),
4064 & lptr_rhs_root, size_root, master_root, id%RHSCOMP(ibeg_rhscomp),
4065 & ld_rhscomp, id%POSINRHSCOMP_ROW(1), id%POSINRHSCOMP_COL(1),
4066 & nz_this_block, nbcol_inbloc, id%NRHS, jbeg_rhs, id%Step2node(1),
4067 & id%KEEP(28),irhs_sparse_copy(1),irhs_ptr_copy(1), size(perm_rhs),
4068 & perm_rhs, size(uns_perm_inv), uns_perm_inv, nb_fs_rhscomp_f,
4069 & nb_fs_rhscomp_tot,nbsparse_loc,ptr_rhs_bounds(1),lptr_rhs_bounds
4070 & ,id%IPOOL_B_L0_OMP(1),id%LPOOL_B_L0_OMP,id%IPOOL_A_L0_OMP(1),
4071 & id%LPOOL_A_L0_OMP,id%L_VIRT_L0_OMP,id%VIRT_L0_OMP(1),
4072 & id%L_PHYS_L0_OMP, id%PHYS_L0_OMP(1), id%PERM_L0_OMP(1),
4073 & id%PTR_LEAFS_L0_OMP(1), id%L0_OMP_MAPPING(1), id%LL0_OMP_MAPPING,
4074 & id%L0_OMP_FACTORS(1), id%LL0_OMP_FACTORS )
4075 ENDIF ! end of exploit sparsity (pruning nodes of the tree)
4076 END IF
4077C -----------------
4078C End of slave code
4079C -----------------
4080C
4081C
4082C Propagate errors
4083 CALL mumps_propinfo( icntl(1), info(1),
4084 & id%COMM,id%MYID)
4085 timec2=mpi_wtime()-timec1+timec2
4086C
4087C Change error code.
4088 IF (info(1).eq.-2) then
4089 info(1)=-11
4090 IF (lpok)
4091 & write(lp,*)
4092 & ' WARNING : -11 error code obtained in solve'
4093 END IF
4094 IF (info(1).eq.-3) then
4095 info(1)=-14
4096 IF (lpok)
4097 & write(lp,*)
4098 & ' WARNING : -14 error code obtained in solve'
4099 END IF
4100C
4101C Return in case of error.
4102 IF (info(1).LT.0) GO TO 90
4103C
4104C ======================================================
4105C ONLY FORWARD was performed (case of reduced RHS with Schur
4106C option during factorisation)
4107C ======================================================
4108 IF ( keep(221) .EQ. 1 ) THEN ! === Begin OF REDUCED RHS ======
4109C --------------------------------------
4110C Send (or copy) reduced RHS from PTR_RHS_ROOT located on
4111C MASTER_ROOT_IN_COMM to REDRHS located on MASTER (host node).
4112C (column by column if leading dimension LD_REDRHS
4113C of REDRHS is not equal to SIZE_ROOT)
4114C --------------------------------------
4115 IF ( ( id%MYID .EQ. master_root_in_comm ) .AND.
4116 & ( id%MYID .EQ. master ) ) THEN
4117C -- same proc --> copy
4118 ii = 0
4119 DO k=1, nbrhs_eff
4120 kdec = ibeg_redrhs+int(k-1,8)*int(ld_redrhs,8) - 1_8
4121 DO i = 1, size_root
4122 id%REDRHS(kdec+i) = ptr_rhs_root(ii+i)
4123 ENDDO
4124 ii = ii+size_root
4125 ENDDO
4126 ELSE
4127C -- recv in REDRHS
4128 IF ( id%MYID .EQ. master ) THEN
4129C -- recv from MASTER_ROOT_IN_COMM
4130 IF (ld_redrhs.EQ.size_root) THEN
4131C -- One message to receive
4132 kdec = ibeg_redrhs
4133 CALL mpi_recv(id%REDRHS(kdec),
4134 & size_root*nbrhs_eff,
4135 & mpi_complex,
4136 & master_root_in_comm, 0, id%COMM,
4137 & status,ierr)
4138 ELSE
4139C -- NBRHS_EFF receives
4140 DO k=1, nbrhs_eff
4141 kdec = ibeg_redrhs+int(k-1,8)*int(ld_redrhs,8)
4142 CALL mpi_recv(id%REDRHS(kdec),size_root,
4143 & mpi_complex,
4144 & master_root_in_comm, 0, id%COMM,
4145 & status,ierr)
4146 ENDDO
4147 ENDIF
4148 ELSE IF ( id%MYID .EQ. master_root_in_comm ) THEN
4149C -- send to MASTER
4150 ii = 1
4151 IF (ld_redrhs.EQ.size_root) THEN
4152C -- send all in on shot
4153 CALL mpi_send(ptr_rhs_root(ii),
4154 & size_root*nbrhs_eff,
4155 & mpi_complex,
4156 & master, 0, id%COMM,ierr)
4157 ELSE
4158 DO k=1, nbrhs_eff
4159 CALL mpi_send(ptr_rhs_root(ii),size_root,
4160 & mpi_complex,
4161 & master, 0, id%COMM,ierr)
4162 ii = ii + size_root
4163 ENDDO
4164 ENDIF
4165 ENDIF
4166C -- other procs are not concerned
4167 ENDIF
4168 ENDIF ! ====== END OF REDUCED RHS (Fwd only performed) ======
4169C =======================================================
4170C BACKWARD was PERFORMED
4171C Postprocess solution that is distributed
4172 IF ( keep(221) .NE. 1 ) THEN ! BACKWARD was PERFORMED
4173C -- KEEP(221).NE.1 => we are sure that backward has been performed
4174 IF (icntl21 == 0) THEN ! CENTRALIZED SOLUTION
4175C ========================================================
4176C GATHER SOLUTION computed during bwd
4177C Each proc holds the pieces of solution corresponding
4178C to all fully summed variables mapped on that processor
4179C (i.e. corresponding to master nodes mapped on that proc)
4180C In case of A-1 we gather directly in RHS_SPARSE
4181C the distributed solution.
4182C Scaling is done in all case on the fly of the reception
4183C Note that when only FORWARD has been performed
4184C RSH_MUMPS holds the solution computed during forward step
4185C (CMUMPS_SOL_R)
4186C there is no need to copy back in RSH_MUMPS the solution
4187C ========================================================
4188C centralized solution
4189 IF (keep(237).EQ.0) THEN
4190C CWORK not needed for AM1
4191 lcwork = max(max(keep(247),keep(246)),1)
4192 ALLOCATE( cwork(lcwork), stat=allocok )
4193 IF (allocok > 0) THEN
4194 info(1)=-13
4195 info(2)=max(max(keep(247),keep(246)),1)
4196 ENDIF
4197 ENDIF
4198 IF ( (id%MYID.EQ.master).AND. (keep(237).NE.0)
4199 & .AND. (id%NSLAVES.NE.1)) THEN
4200C Precompute map of indices in current column
4201C (no need to reset it between columns
4202 ALLOCATE (map_rhs(id%N), stat = allocok)
4203 IF ( allocok .GT. 0 ) THEN
4204 IF (lpok) THEN
4205 WRITE(lp,*) ' Problem allocation of MAP_RHS at solve'
4206 ENDIF
4207 info(1) = -13
4208 info(2) = id%N
4209 ELSE
4210 nb_bytes = nb_bytes + int(id%N,8) * k34_8
4211 nb_bytes_max = max(nb_bytes_max,nb_bytes)
4212 ENDIF
4213 ENDIF
4214C Propagate errors
4215 CALL mumps_propinfo( icntl(1), info(1),
4216 & id%COMM,id%MYID)
4217C Return in case of error.
4218 IF (info(1).LT.0) GO TO 90
4219 IF ((id%MYID.NE.master).OR. .NOT.lscal) THEN
4220 pt_scaling => dummy_scal
4221 ELSE
4222 IF (mtype.EQ.1) THEN
4223 pt_scaling => id%COLSCA
4224 ELSE
4225 pt_scaling => id%ROWSCA
4226 ENDIF
4227 ENDIF
4228 liw_passed = max( liw, 1 )
4229 timegather1=mpi_wtime()
4230 IF ( .NOT.i_am_slave ) THEN
4231C I did not participate to computing part of the solution
4232C (id%RHSCOMP not set/allocate) : receive solution, store
4233C it and scale it.
4234 IF (keep(237).EQ.0) THEN
4235C We need a workspace of minimal size KEEP(247)
4236C in order to unpack pieces of the solution.
4237 CALL cmumps_gather_solution(id%NSLAVES,id%N,
4238 & id%MYID, id%COMM, nbrhs_eff,
4239 & mtype, id%RHS(1), ld_rhs, id%NRHS, jbeg_rhs,
4240 & jdummy, id%KEEP(1), id%KEEP8(1),
4241 & id%PROCNODE_STEPS(1), idummy, 1,
4242 & id%STEP(1), bufr(1), lbufr, lbufr_bytes,
4243 & cwork(1), lcwork,
4244 & lscal, pt_scaling(1), size(pt_scaling),
4245 & c_dummy, 1 , 1, idummy, 1,
4246 & perm_rhs, size(perm_rhs) ! for sparse permuted RHS
4247 & )
4248 ELSE
4249C only gather target entries of A-1
4250 CALL cmumps_gather_solution_am1(id%NSLAVES,id%N,
4251 & id%MYID, id%COMM, nbrhs_eff,
4252 & c_dummy, 1, 1,
4253 & id%KEEP(1), bufr(1), lbufr, lbufr_bytes,
4254 & lscal, pt_scaling(1), size(pt_scaling)
4255C --- A-1 related entries
4256 & ,irhs_ptr_copy(1), size(irhs_ptr_copy),
4257 & irhs_sparse_copy(1), size(irhs_sparse_copy),
4258 & rhs_sparse_copy(1), size(rhs_sparse_copy),
4259 & uns_perm_inv, size(uns_perm_inv),
4260 & idummy, 1, 0
4261 & )
4262 ENDIF
4263 ELSE
4264C Avoid temporary copy (IS(1)) that some old
4265C compilers would do otherwise
4266 IF (keep(237).EQ.0) THEN
4267 IF (id%MYID.EQ.master) THEN
4268 ptr_rhs => id%RHS
4269 ncol_rhs_loc = id%NRHS
4270 ld_rhs_loc = ld_rhs
4271 jbeg_rhs_loc = jbeg_rhs
4272 ELSE
4273 ptr_rhs => cdummy_target
4274 ncol_rhs_loc = 1
4275 ld_rhs_loc = 1
4276 jbeg_rhs_loc = 1
4277 ENDIF
4278 CALL cmumps_gather_solution(id%NSLAVES,id%N,
4279 & id%MYID, id%COMM, nbrhs_eff, mtype,
4280 & ptr_rhs(1), ld_rhs_loc, ncol_rhs_loc, jbeg_rhs_loc,
4281 & id%PTLUST_S(1), id%KEEP(1), id%KEEP8(1),
4282 & id%PROCNODE_STEPS(1), is(1), liw_passed,
4283 & id%STEP(1), bufr(1), lbufr, lbufr_bytes,
4284 & cwork(1), lcwork,
4285 & lscal, pt_scaling(1), size(pt_scaling),
4286 & id%RHSCOMP(ibeg_rhscomp), ld_rhscomp, nbrhs_eff,
4287 & id%POSINRHSCOMP_COL(1), id%N,
4288 & perm_rhs, size(perm_rhs) ! For sparse permuted RHS
4289 & )
4290 ELSE ! only gather target entries of A-1
4291 CALL cmumps_gather_solution_am1(id%NSLAVES,id%N,
4292 & id%MYID, id%COMM, nbrhs_eff,
4293 & id%RHSCOMP(ibeg_rhscomp), ld_rhscomp, nbrhs_eff,
4294 & id%KEEP(1), bufr(1), lbufr, lbufr_bytes,
4295 & lscal, pt_scaling(1), size(pt_scaling)
4296C --- A-1 related entries
4297 & , irhs_ptr_copy(1), size(irhs_ptr_copy),
4298 & irhs_sparse_copy(1), size(irhs_sparse_copy),
4299 & rhs_sparse_copy(1), size(rhs_sparse_copy),
4300 & uns_perm_inv, size(uns_perm_inv),
4301 & id%POSINRHSCOMP_COL(1), id%N, nb_fs_rhscomp_tot
4302 & )
4303 ENDIF
4304 ENDIF
4305 timegather2=mpi_wtime()-timegather1+timegather2
4306 IF (keep(237).EQ.0) DEALLOCATE( cwork )
4307 IF ( (id%MYID.EQ.master).AND. (keep(237).NE.0)
4308 & ) THEN
4309C Copy back solution from RHS_SPARSE_COPY TO RHS_SPARSE
4310 DO j = jbeg_rhs, jbeg_rhs+nbcol_inbloc-1
4311 IF (do_permute_rhs.OR.interleave_par) THEN
4312 pj = perm_rhs(j)
4313 ELSE
4314 pj =j
4315 ENDIF
4316 colsize = id%IRHS_PTR(pj+1) -
4317 & id%IRHS_PTR(pj)
4318 IF (colsize.EQ.0) cycle
4319 jj = j-jbeg_rhs+1
4320C Precompute map of indices in current column
4321C (no need to reset it between columns
4322 IF (id%NSLAVES.NE.1) THEN
4323 DO ii=1, colsize
4324 map_rhs(id%IRHS_SPARSE(
4325 & id%IRHS_PTR(pj) + ii - 1)) = ii
4326 ENDDO
4327 DO iz2 = irhs_ptr_copy(jj),irhs_ptr_copy(jj+1)-1
4328 ii = irhs_sparse_copy(iz2)
4329 id%RHS_SPARSE(id%IRHS_PTR(pj)+map_rhs(ii)-1)=
4330 & rhs_sparse_copy(iz2)
4331 ENDDO
4332 ELSE
4333C Entries within a column are in order
4334C IZ - Column index in Sparse RHS
4335 DO iz= id%IRHS_PTR(pj), id%IRHS_PTR(pj+1)-1
4336 iz2 = irhs_ptr_copy(jj) +
4337 & iz - id%IRHS_PTR(pj)
4338 id%RHS_SPARSE(iz) = rhs_sparse_copy(iz2)
4339 ENDDO
4340 ENDIF
4341 ENDDO
4342 IF (id%NSLAVES.NE.1) THEN
4343 nb_bytes = nb_bytes - int(size(map_rhs),8) * k34_8
4344 DEALLOCATE ( map_rhs )
4345 ENDIF
4346 ENDIF ! end A-1 on master
4347C
4348C -- END of backward was performed with centralized solution
4349 ELSE ! (KEEP(221).NE.1) .AND.(ICNTL21.NE.0))
4350C
4351C BEGIN of backward performed with distributed solution
4352C time local copy + scaling
4353 timecopyscale1=mpi_wtime()
4354C The non working host should not do this:
4355 IF ( i_am_slave ) THEN
4356 liw_passed = max( liw, 1 )
4357C Only called if more than 1 pivot
4358C was eliminated by the processor.
4359C Note that LSOL_loc >= KEEP(89)
4360 IF ( keep(89) .GT. 0 ) THEN
4361 CALL cmumps_distributed_solution(id%NSLAVES,
4362 & id%N,id%MYID_NODES,
4363 & mtype, id%RHSCOMP(ibeg_rhscomp), ld_rhscomp,
4364 & nbrhs_eff, id%POSINRHSCOMP_COL(1),
4365 & id%ISOL_loc(1), id%SOL_loc(1), id%NRHS,
4366 & jbeg_rhs-nb_rhsskipped, id%LSOL_loc,
4367 & id%PTLUST_S(1), id%PROCNODE_STEPS(1),
4368 & id%KEEP(1),id%KEEP8(1),
4369 & is(1), liw_passed,
4370 & id%STEP(1), scaling_data_sol, lscal, nb_rhsskipped,
4371 & perm_rhs, size(perm_rhs) ) ! For permuted sparse RHS
4372 ENDIF
4373 ENDIF
4374 timecopyscale2=mpi_wtime()-timecopyscale1+timecopyscale2
4375 ENDIF
4376C === BACKWARD was PERFORMED WITH DISTRIBUTED SOLUTION ===
4377C ========================================================
4378 ENDIF ! ==== END of BACKWARD was PERFORMED (KEEP(221).NE.1)
4379C note that the main DO-loop on blocks is not ended yet
4380C
4381C ============================================
4382C BEGIN
4383C
4384C ITERATIVE REFINEMENT AND/OR ERROR ANALYSIS
4385C
4386C ============================================
4387 IF ( icntl10 > 0 .AND. nbrhs_eff > 1 ) THEN
4388C
4389C ----------------------------------
4390C Multiple RHS: apply a fixed number
4391C of iterative refinement steps
4392C ----------------------------------
4393C DO I = 1, ICNTL10
4394 write(6,*) ' Internal ERROR 15 in sol_driver '
4395C Compute residual: Y <- SAVERHS - A * RHS
4396C Solve RHS <- A^-1 Y, Y modified
4397C Assemble in RHS(REDUCE)
4398C RHS <- RHS + Y
4399C END DO
4400 END IF
4401 IF (postpros) THEN
4402C
4403C SAVERHS holds the original right hand side
4404C Sparse rhs are saved in SAVERHS as dense rhs
4405C
4406C * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
4407C
4408C Start iterative refinements. The master is managing the
4409C organisation of work, but slaves are used to solve systems of
4410C equations and, in case of distributed matrix, perform
4411C matrix-vector products. It is more complicated to do this with
4412C the SPMD version than it was with the master/slave approach.
4413C
4414C * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
4415c IF ( PROK .AND. ICNTL10 .NE. 0 ) WRITE( MP, 270 )
4416 IF ( prokg .AND. icntl10 .NE. 0 ) WRITE( mpg, 270 )
4417C Initializations and allocations
4418 nitref = abs(icntl10)
4419 ALLOCATE(r_y(id%N), stat = allocok)
4420 IF ( allocok .GT. 0 ) THEN
4421 info(1)=-13
4422 info(2)=id%N
4423 GOTO 777
4424 ENDIF
4425 nb_bytes = nb_bytes + int(id%N,8)*k16_8
4426 ALLOCATE(c_y(id%N), stat = allocok)
4427 IF ( allocok .GT. 0 ) THEN
4428 info(1)=-13
4429 info(2)=id%N
4430 GOTO 777
4431 ENDIF
4432 nb_bytes = nb_bytes + int(id%N,8)*k35_8
4433 IF ( id%MYID .EQ. master ) THEN
4434 ALLOCATE( iw1( 2 * id%N ),stat = allocok )
4435 IF ( allocok .GT. 0 ) THEN
4436 info(1)=-13
4437 info(2)=2 * id%N
4438 GOTO 777
4439 ENDIF
4440 nb_bytes = nb_bytes + int(2*id%N,8)*k34_8
4441 ALLOCATE( c_w(id%N), stat = allocok )
4442 IF ( allocok .GT. 0 ) THEN
4443 info(1)=-13
4444 info(2)=id%N
4445 GOTO 777
4446 ENDIF
4447 nb_bytes = nb_bytes + int(id%N,8)*k35_8
4448 ALLOCATE( r_w(2*id%N), stat = allocok )
4449 IF ( allocok .GT. 0 ) THEN
4450 info(1)=-13
4451 info(2)=id%N
4452 GOTO 777
4453 ENDIF
4454 nb_bytes = nb_bytes + int(2*id%N,8)*k16_8
4455 IF ( prokg .AND. icntl10 .GT. 0 )
4456 & WRITE( mpg, 240) 'MAXIMUM NUMBER OF STEPS =', nitref
4457C end allocations on Master
4458 END IF
4459 ALLOCATE(c_locwk54(id%N),stat = allocok)
4460 IF ( allocok .GT. 0 ) THEN
4461 info(1)=-13
4462 info(2)=id%N
4463 GOTO 777
4464 ENDIF
4465 nb_bytes = nb_bytes + int(id%N,8)*k35_8
4466 ALLOCATE(r_locwk54(id%N),stat = allocok)
4467 IF ( allocok .GT. 0 ) THEN
4468 info(1)=-13
4469 info(2)=id%N
4470 GOTO 777
4471 ENDIF
4472 nb_bytes = nb_bytes + int(id%N,8)*k16_8
4473 kase = 0
4474C Synchro point with broadcast of errors
4475 777 CONTINUE
4476 nb_bytes_max = max(nb_bytes_max,nb_bytes)
4477 CALL mumps_propinfo( icntl(1), info(1),
4478 & id%COMM,id%MYID)
4479 IF ( info(1) .LT. 0 ) GOTO 90
4480C TIMEEA needed if EA and IR with stopping criterium
4481C and IR with fixed n.of steps.
4482 timeea = 0.0e0
4483C TIMEEA1 needed if EA and IR with fixed n.of steps
4484 timeea1 = 0.0e0
4485 CALL mumps_secdeb(timeit)
4486C -------------------------
4487C
4488C RHSOL holds the initial guess for the solution
4489C We start the loop on the Iterative refinement procedure
4490C
4491C
4492C
4493C |- IRefin. L O O P -|
4494C V V
4495C
4496C =========================================================
4497C Computation of the infinity norm of A
4498C =========================================================
4499 IF ((icntl11.GT.0).OR.(icntl10.GT.0)) THEN
4500C We don't get through these lines if ICNTL10<=0 AND ICNTL11<=0
4501 IF ( keep(54) .eq. 0 ) THEN
4502C ------------------
4503C Centralized matrix
4504C ------------------
4505 IF ( id%MYID .eq. master ) THEN
4506C -----------------------------------------
4507C Call CMUMPS_SOL_X outside, if needed,
4508C in order to compute w(i,2)=sum|Aij|,j=1:n
4509C in vector R_W(id%N+i)
4510C -----------------------------------------
4511 IF (keep(55).NE.0) THEN
4512C unassembled matrix and norm of row required
4513 CALL cmumps_sol_x_elt(mtype, id%N,
4514 & id%NELT, id%ELTPTR(1),
4515 & id%LELTVAR, id%ELTVAR(1),
4516 & id%KEEP8(30), id%A_ELT(1),
4517 & r_w(id%N+1), keep(1),keep8(1) )
4518 ELSE
4519C assembled matrix
4520 IF ( mtype .eq. 1 ) THEN
4521 CALL cmumps_sol_x
4522 & ( id%A(1), id%KEEP8(28), id%N, id%IRN(1), id%JCN(1),
4523 & r_w(id%N+1), keep(1),keep8(1),
4524 & 0, id%SYM_PERM(1) )
4525 ELSE
4526 CALL cmumps_sol_x
4527 & ( id%A(1), id%KEEP8(28), id%N, id%JCN(1), id%IRN(1),
4528 & r_w(id%N+1), keep(1),keep8(1),
4529 & 0, id%SYM_PERM(1) )
4530 END IF
4531 ENDIF
4532 ENDIF
4533 ELSE
4534C ---------------------
4535C Matrix is distributed
4536C ---------------------
4537 IF ( i_am_slave .and.
4538 & id%KEEP8(29) .NE. 0_8 ) THEN
4539 IF ( mtype .eq. 1 ) THEN
4540 CALL cmumps_sol_x(id%A_loc(1),
4541 & id%KEEP8(29), id%N,
4542 & id%IRN_loc(1), id%JCN_loc(1),
4543 & r_locwk54, id%KEEP(1),id%KEEP8(1),
4544 & 0, id%SYM_PERM(1) )
4545 ELSE
4546 CALL cmumps_sol_x(id%A_loc(1),
4547 & id%KEEP8(29), id%N,
4548 & id%JCN_loc(1), id%IRN_loc(1),
4549 & r_locwk54, id%KEEP(1),id%KEEP8(1),
4550 & 0, id%SYM_PERM(1) )
4551 END IF
4552 ELSE
4553 r_locwk54 = rzero
4554 END IF
4555C -------------------------
4556C Assemble result on master
4557C -------------------------
4558 IF ( id%MYID .eq. master ) THEN
4559 CALL mpi_reduce( r_locwk54, r_w( id%N + 1 ),
4560 & id%N, mpi_real,
4561 & mpi_sum,master,id%COMM, ierr)
4562 ELSE
4563 CALL mpi_reduce( r_locwk54, r_dummy,
4564 & id%N, mpi_real,
4565 & mpi_sum,master,id%COMM, ierr)
4566 END IF
4567C End if KEEP(54)
4568 END IF
4569C
4570 IF ( id%MYID .eq. master ) THEN
4571C R_W is available on the master process only
4572 rinfog(4) = real(zero)
4573 DO i = 1, id%N
4574 rinfog(4) = max(r_w( id%N +i), rinfog(4))
4575 ENDDO
4576 ENDIF
4577C end ICNTL11 =/0 v ICNTL10>0
4578 ENDIF
4579C =========================================================
4580C END norm of A
4581C =========================================================
4582C Initializations for the IR
4583 noiter = 0
4584 iflag_ir = 0
4585 testconv = .false.
4586C Test of convergence should be made
4587 IF (( id%MYID .eq. master ).AND.(icntl10.GT.0)) THEN
4588 testconv = .true.
4589 arret = cntl(2)
4590 IF (arret .LT. 0.0e0) THEN
4591 arret = sqrt(epsilon(0.0e0))
4592 END IF
4593 ENDIF
4594C =========================================================
4595C Starting IR
4596 DO 22 irstep = 1, nitref +1
4597C =========================================================
4598C
4599C =========================================================
4600C Refine the solution starting from the second step of do loop
4601C =========================================================
4602 IF (( id%MYID .eq. master ).AND.(irstep.GT.1)) THEN
4603 noiter = noiter + 1
4604 DO i = 1, id%N
4605 id%RHS(ibeg+i-1) = id%RHS(ibeg+i-1) + c_y(i)
4606 ENDDO
4607 ENDIF
4608C ===========================================
4609C Computation of the RESIDUAL and of |A||x|
4610C ===========================================
4611 IF ( keep(54) .eq. 0 ) THEN
4612 IF ( id%MYID .eq. master ) THEN
4613 IF (keep(55).NE.0) THEN
4614C input matrix by element
4615 CALL cmumps_eltyd( mtype, id%N,
4616 & id%NELT, id%ELTPTR(1), id%LELTVAR,
4617 & id%ELTVAR(1), id%KEEP8(30), id%A_ELT(1),
4618 & saverhs, id%RHS(ibeg),
4619 & c_y, r_w, keep(50))
4620 ELSE
4621 IF ( mtype .eq. 1 ) THEN
4622 CALL cmumps_sol_y(id%A(1), id%KEEP8(28),
4623 & id%N, id%IRN(1),
4624 & id%JCN(1), saverhs,
4625 & id%RHS(ibeg), c_y, r_w, keep(1),keep8(1))
4626 ELSE
4627 CALL cmumps_sol_y(id%A(1), id%KEEP8(28),
4628 & id%N, id%JCN(1),
4629 & id%IRN(1), saverhs,
4630 & id%RHS(ibeg), c_y, r_w, keep(1),keep8(1))
4631 ENDIF
4632 ENDIF
4633 ENDIF
4634 ELSE
4635C ---------------------
4636C Matrix is distributed
4637C ---------------------
4638 CALL mpi_bcast( rhs_ir(ibeg), id%N,
4639 & mpi_complex, master,
4640 & id%COMM, ierr )
4641C --------------------------------------
4642C Compute Y = SAVERHS - A * RHS
4643C Y, SAVERHS defined only on master
4644C --------------------------------------
4645 IF ( i_am_slave .and.
4646 & id%KEEP8(29) .NE. 0_8 ) THEN
4647 CALL cmumps_loc_mv8( id%N, id%KEEP8(29),
4648 & id%IRN_loc(1), id%JCN_loc(1), id%A_loc(1),
4649 & rhs_ir(ibeg), c_locwk54, keep(50), mtype )
4650 ELSE
4651 c_locwk54 = zero
4652 END IF
4653 IF ( id%MYID .eq. master ) THEN
4654 CALL mpi_reduce( c_locwk54, c_y,
4655 & id%N, mpi_complex,
4656 & mpi_sum,master,id%COMM, ierr)
4657C ===========================
4658 c_y = saverhs - c_y
4659C ===========================
4660 ELSE
4661 CALL mpi_reduce( c_locwk54, c_dummy,
4662 & id%N, mpi_complex,
4663 & mpi_sum,master,id%COMM, ierr)
4664 END IF
4665C --------------------------------------
4666C Compute
4667C * If MTYPE = 1
4668C W(i) = Sum | Aij | | RHSj |
4669C j
4670C * If MTYPE = 0
4671C W(j) = Sum | Aij | | RHSi |
4672C i
4673C R_LOCWK54 used as local array for W
4674C RHS has been broadcasted
4675C --------------------------------------
4676 IF ( i_am_slave .and. id%KEEP8(29) .NE. 0_8 ) THEN
4677 CALL cmumps_loc_omega1( id%N, id%KEEP8(29),
4678 & id%IRN_loc(1), id%JCN_loc(1), id%A_loc(1),
4679 & rhs_ir(ibeg), r_locwk54, keep(50), mtype )
4680 ELSE
4681 r_locwk54 = rzero
4682 END IF
4683 IF ( id%MYID .eq. master ) THEN
4684 CALL mpi_reduce( r_locwk54, r_w,
4685 & id%N, mpi_real,
4686 & mpi_sum,master,id%COMM, ierr)
4687 ELSE
4688 CALL mpi_reduce( r_locwk54, r_dummy,
4689 & id%N, mpi_real,
4690 & mpi_sum, master, id%COMM, ierr)
4691 ENDIF
4692 ENDIF
4693C =====================================
4694C END computation RESIDUAL and |A||x|
4695C =====================================
4696 IF ( id%MYID .eq. master ) THEN
4697C
4698 IF ((icntl11.GT.0).OR.(icntl10.GT.0)) THEN
4699C --------------
4700C Error analysis and test of convergence,
4701C Compute the sparse componentwise backward error:
4702C - at each step if test of convergence of IR is
4703C requested (ICNTL(10)>0)
4704C - at step 1 and NITREF+1 if error analysis
4705C to be computed (ICNTL(11)>0) and if ICNTL(10)< 0
4706 IF (((icntl11.GT.0).OR.((icntl10.LT.0).AND.
4707 & ((irstep.EQ.1).OR.(irstep.EQ.nitref+1)))
4708 & .OR.((icntl10.EQ.0).AND.(irstep.EQ.1)))
4709 & .OR.(icntl10.GT.0)) THEN
4710C Compute w1 and w2
4711C always if ICNTL10>0 in the other case if ICNTL11>0
4712C -----------------
4713 IF (icntl10.LT.0) CALL mumps_secdeb(timeea1)
4714 CALL cmumps_sol_omega(id%N,saverhs,
4715 & id%RHS(ibeg), c_y, r_w, c_w, iw1, iflag_ir,
4716 & rinfog(7), noiter, testconv,
4717 & mp, arret, keep(361) )
4718 IF (icntl10.LT.0) THEN
4719 CALL mumps_secfin(timeea1)
4720 id%DKEEP(120)=id%DKEEP(120)+real(timeea1)
4721 ENDIF
4722 ENDIF
4723 IF ((icntl11.GT.0).AND.(
4724 & (icntl10.LT.0.AND.(irstep.EQ.1.OR.irstep.EQ.nitref+1))
4725 & .OR.((icntl10.GE.0).AND.(irstep.EQ.1))
4726 & )) THEN
4727C Error analysis before iterative refinement
4728C or for last if icntl10<0
4729C ------------------------------------------
4730 CALL mumps_secdeb(timeea)
4731 IF (icntl10.EQ.0) THEN
4732C No IR : there will be only the EA of the 1st sol.
4733 IF ( mpg .GT. 0 ) WRITE( mpg, 170 )
4734 ELSEIF (irstep.EQ.1) THEN
4735C IR : we print the EA of the 1st sol.
4736 IF ( mpg .GT. 0 ) WRITE( mpg, 55 )
4737 ELSEIF ((icntl10.LT.0).AND.(irstep.EQ.nitref+1)) THEN
4738C IR with fixed n. of steps: we print the EA
4739C of the last sol.
4740 IF ( mpg .GT. 0 ) THEN
4741 WRITE( mpg, 81 )
4742 WRITE( mpg, * )
4743 WRITE( mpg, 141 )
4744 & 'NUMBER OF STEPS OF ITERATIVE REFINEMENT REQUESTED =',
4745 & noiter
4746 ENDIF
4747 ENDIF
4748 givsol = .true.
4749 CALL cmumps_sol_q(mtype,info(1),id%N,
4750 & id%RHS(ibeg),
4751 & saverhs,r_w(id%N+1),c_y,givsol,
4752 & rinfog(4),rinfog(5),rinfog(6),mpg,icntl(1),
4753 & keep(1),keep8(1))
4754 IF ( mpg .GT. 0 ) THEN
4755C Error analysis before iterative refinement
4756 WRITE( mpg, 115 )
4757 & 'RINFOG(7):COMPONENTWISE SCALED RESIDUAL(W1)=',
4758 & rinfog(7)
4759 WRITE( mpg, 115 )
4760 & '------(8):---------------------------- (W2)=',
4761 & rinfog(8)
4762 END IF
4763 CALL mumps_secfin(timeea)
4764 id%DKEEP(120)=id%DKEEP(120)+real(timeea)
4765C end EA of the first solution
4766 END IF
4767 END IF
4768C --------------
4769 IF (irstep.EQ.nitref +1) THEN
4770C If we are at the NITREF+1 step , we have refined the
4771C solution NITREF times so we have to stop.
4772 kase = 0
4773C If we test the convergence (ICNTL10.GT.0) and
4774C IFLAG_IR = 0 we set a warning : more than NITREF steps
4775C needed
4776 IF ((icntl10.GT.0).AND.(iflag_ir.EQ.0))
4777 & id%INFO(1) = id%INFO(1) + 8
4778 ELSE
4779 IF (icntl10.GT.0) THEN
4780C -------------------
4781C Results of the test of convergence.
4782C IFLAG_IR = 0 we should try to improve the solution
4783C = 1 the stopping criterium is satisfied
4784C = 2 the method is diverging, we go back
4785C to the previous iterate
4786C = 3 the convergence is too slow
4787 IF (iflag_ir.GT.0) THEN
4788C If the convergence criterion is satisfied
4789C or the convergence too slow
4790C we set KASE=0 (end of the Iterative refinement)
4791 kase = 0
4792C If the convergence is not improved,
4793C we go back to the previous iterate.
4794C IFLAG_IR can be equal to 2 only if IRStep >= 2
4795 IF (iflag_ir.EQ.2) noiter = noiter - 1
4796 ELSE
4797C IFLAG_IR=0, try to improve the solution
4798 kase = 2
4799 ENDIF
4800 ELSEIF (icntl10.LT.0) THEN
4801C -------------------
4802 kase = 2
4803 ELSE
4804C ICNTL10 = 0, we want to perform only EA and not IR.
4805C -----------------
4806 kase = 0
4807 END IF
4808 ENDIF
4809C End Master
4810 ENDIF
4811C --------------
4812C Broadcast KASE
4813C --------------
4814 CALL mpi_bcast( kase, 1, mpi_integer, master,
4815 & id%COMM, ierr )
4816C If Kase= 0 we quit the IR process
4817 IF (kase.LE.0) GOTO 666
4818 IF (kase.LT.0) THEN
4819 WRITE(*,*) "Internal error 17 in CMUMPS_SOL_DRIVER"
4820 ENDIF
4821C =========================================================
4822C COMPUTE the solution of Ay = r
4823C =========================================================
4824C Call internal routine to avoid code duplication
4825 CALL cmumps_pp_solve()
4826 IF (info(1) .LT. 0) GOTO 90
4827C -----------------------
4828C Go back to beginning of
4829C loop to apply next step
4830C of iterative refinement
4831C -----------------------
4832 22 CONTINUE
4833 666 CONTINUE
4834C ************************************************
4835C
4836C End of the iterative refinement procedure
4837C
4838C ************************************************
4839 CALL mumps_secfin(timeit)
4840 IF ( id%MYID .EQ. master ) THEN
4841 IF ( nitref .GT. 0 ) THEN
4842 id%INFOG(15) = noiter
4843 END IF
4844C id%DKEEP(114) time for the iterative refinement
4845C id%DKEEP(120) time for the error analysis
4846C id%DKEEP(121) time for condition number
4847C these values are meaningful only on the host.
4848 IF (icntl10.EQ.0) THEN
4849C No IR has been requested. All the time is needed
4850C for computing EA
4851 id%DKEEP(120)=real(timeit)
4852 ELSE
4853C IR has been requested
4854 id%DKEEP(114)=real(timeit)-id%DKEEP(120)
4855 ENDIF
4856 END IF
4857 IF ( prokg ) THEN
4858 IF (icntl10.GT.0) THEN
4859 WRITE( mpg, 81 )
4860 WRITE( mpg, * )
4861 WRITE( mpg, 141 )
4862 & 'NUMBER OF STEPS OF ITERATIVE REFINEMENTS PERFORMED =',
4863 & noiter
4864 ENDIF
4865 ENDIF
4866C
4867C ==================================================
4868C BEGIN
4869C Perform error analysis after iterative refinement
4870C ==================================================
4871 IF ((icntl11 .GT. 0).AND.(icntl10.GT.0)) THEN
4872C If IR is requested with test of convergence,
4873C the EA of the last step of IR is done here,
4874C otherwise EA of the last step is done at the
4875C end of IR
4876 CALL mumps_secdeb(timeea)
4877 kase = 0
4878 IF (id%MYID .eq. master ) THEN
4879C Test if IFLAG_IR = 2, that is if the the IR was diverging,
4880C we went back to the previous iterate
4881C We have to do EA on the last computed solution.
4882 IF (iflag_ir.EQ.2) kase = 2
4883 ENDIF
4884C --------------
4885C Broadcast KASE
4886C --------------
4887 CALL mpi_bcast( kase, 1, mpi_integer, master,
4888 & id%COMM, ierr )
4889 IF (kase.EQ.2) THEN
4890C We went back to the previous iterate
4891C We have to do EA on the last computed solution.
4892C Compute the residual in C_Y using IRN, JCN, ASPK
4893C and the solution RHS(IBEG)
4894C The norm of the ith row in R_Y(I).
4895 IF ( keep(54) .eq. 0 ) THEN
4896C ---------------------
4897C Matrix is centralized
4898C ---------------------
4899 IF (id%MYID .EQ. master) THEN
4900 IF (keep(55).EQ.0) THEN
4901 CALL cmumps_qd2( mtype, id%N, id%KEEP8(28), id%A(1),
4902 & id%IRN(1), id%JCN(1),
4903 & id%RHS(ibeg), saverhs, r_y, c_y, keep(1),keep8(1))
4904 ELSE
4905 CALL cmumps_eltqd2( mtype, id%N,
4906 & id%NELT, id%ELTPTR(1),
4907 & id%LELTVAR, id%ELTVAR(1),
4908 & id%KEEP8(30), id%A_ELT(1),
4909 & id%RHS(ibeg), saverhs, r_y, c_y, keep(1),keep8(1))
4910 ENDIF
4911 ENDIF
4912 ELSE
4913C ---------------------
4914C Matrix is distributed
4915C ---------------------
4916 CALL mpi_bcast( rhs_ir(ibeg), id%N,
4917 & mpi_complex, master,
4918 & id%COMM, ierr )
4919C ----------------
4920C Compute residual
4921C ----------------
4922 IF ( i_am_slave .and.
4923 & id%KEEP8(29) .NE. 0_8 ) THEN
4924 CALL cmumps_loc_mv8( id%N, id%KEEP8(29),
4925 & id%IRN_loc(1), id%JCN_loc(1), id%A_loc(1),
4926 & rhs_ir(ibeg), c_locwk54, keep(50), mtype )
4927 ELSE
4928 c_locwk54 = zero
4929 END IF
4930 IF ( id%MYID .eq. master ) THEN
4931 CALL mpi_reduce( c_locwk54, c_y,
4932 & id%N, mpi_complex,
4933 & mpi_sum,master,id%COMM, ierr)
4934 c_y = saverhs - c_y
4935 ELSE
4936 CALL mpi_reduce( c_locwk54, c_dummy,
4937 & id%N, mpi_complex,
4938 & mpi_sum,master,id%COMM, ierr)
4939 END IF
4940 ENDIF
4941 ENDIF ! KASE.EQ.2
4942 IF (id%MYID .EQ. master) THEN
4943C Compute which equations are associated to w1 and which
4944C ones are associated to w2 in case of IFLAG_IR=2.
4945C If IFLAG_IR = 0 or 1 IW1 should be correct
4946 IF (iflag_ir.EQ.2) THEN
4947 testconv = .false.
4948 CALL cmumps_sol_omega(id%N,saverhs,
4949 & id%RHS(ibeg), c_y, r_w, c_w, iw1, iflag_ir,
4950 & rinfog(7), 0, testconv,
4951 & mp, arret, keep(361) )
4952 ENDIF ! (IFLAG_IR.EQ.2)
4953c Compute some statistics for
4954 givsol = .true.
4955 CALL cmumps_sol_q(mtype,info(1),id%N,
4956 & id%RHS(ibeg),
4957 & saverhs,r_w(id%N+1),c_y,givsol,
4958 & rinfog(4),rinfog(5),rinfog(6),mpg,icntl(1),
4959 & keep(1),keep8(1))
4960 ENDIF ! Master
4961 CALL mumps_secfin(timeea)
4962 id%DKEEP(120)=id%DKEEP(120)+real(timeea)
4963 ENDIF ! ICNTL11>0 and ICNTL10>0
4964C =========================================================
4965C Compute the Condition number associated if requested.
4966C =========================================================
4967 CALL mumps_secdeb(timelcond)
4968 IF (icntl11 .EQ. 1) THEN
4969 IF ( id%MYID .eq. master ) THEN
4970C Notice that D is always the identity
4971 ALLOCATE( d(id%N),stat =allocok )
4972 IF ( allocok .GT. 0 ) THEN
4973 info(1)=-13
4974 info(2)=id%N
4975 GOTO 777
4976 ENDIF
4977 nb_bytes = nb_bytes + int(id%N,8)*k16_8
4978 DO i = 1, id%N
4979 d( i ) = rone
4980 END DO
4981 ENDIF
4982 kase = 0
4983 222 CONTINUE
4984 IF ( id%MYID .EQ. master ) THEN
4985 CALL cmumps_sol_lcond(id%N, saverhs,
4986 & id%RHS(ibeg), c_y, d, r_w, c_w, iw1, kase,
4987 & rinfog(7), rinfog(9), rinfog(10),
4988 & mp, keep(1),keep8(1))
4989 ENDIF
4990C --------------
4991C Broadcast KASE
4992C --------------
4993 CALL mpi_bcast( kase, 1, mpi_integer, master,
4994 & id%COMM, ierr )
4995C KASE <= 0
4996C We reach the end of iterative method to compute
4997C LCOND1 and LCOND2
4998 IF (kase.LE.0) GOTO 224
4999 CALL cmumps_pp_solve()
5000 IF (info(1) .LT. 0) GOTO 90
5001C ---------------------------
5002C Go back to beginning of
5003C loop to apply next step
5004C of iterative method
5005C -----------------------
5006 GO TO 222
5007C End ICNTL11 = 1
5008 ENDIF
5009 224 CONTINUE
5010 CALL mumps_secfin(timelcond)
5011 id%DKEEP(121)=id%DKEEP(121)+real(timelcond)
5012 IF ((id%MYID .EQ. master).AND.(icntl11.GT.0)) THEN
5013 IF (icntl10.GT.0) THEN
5014C If ICNTL10<0 these stats have been printed before IR
5015 IF ( mpg .GT. 0 ) THEN
5016 WRITE( mpg, 115 )
5017 & 'RINFOG(7):COMPONENTWISE SCALED RESIDUAL(W1)=',
5018 & rinfog(7)
5019 WRITE( mpg, 115 )
5020 & '------(8):---------------------------- (W2)=',
5021 & rinfog(8)
5022 ENDIF
5023 END IF
5024 IF (icntl11.EQ.1) THEN
5025C If ICNTL11/=1 these stats haven't been computed
5026 IF (mpg.GT.0) THEN
5027 WRITE( mpg, 115 )
5028 & '------(9):Upper bound ERROR ...............=',
5029 & rinfog(9)
5030 WRITE( mpg, 115 )
5031 & '-----(10):CONDITION NUMBER (1) ............=',
5032 & rinfog(10)
5033 WRITE( mpg, 115 )
5034 & '-----(11):CONDITION NUMBER (2) ............=',
5035 & rinfog(11)
5036 END IF
5037 END IF
5038 END IF ! MASTER && ICNTL11.GT.0
5039 IF ( prokg .AND. abs(icntl10) .GT.0 ) WRITE( mpg, 131 )
5040C===================================================
5041C Perform error analysis after iterative refinements
5042C END
5043C===================================================
5044C
5045 IF (id%MYID == master) THEN
5046 nb_bytes = nb_bytes - int(size(c_w),8)*k35_8
5047 DEALLOCATE(c_w)
5048 nb_bytes = nb_bytes - int(size(r_w),8)*k16_8
5049 & - int(size(iw1),8)*k34_8
5050 DEALLOCATE(r_w)
5051 DEALLOCATE(iw1)
5052 IF (icntl11 .EQ. 1) THEN
5053C We have used D only for LCOND1,2
5054 nb_bytes = nb_bytes - int(size(d ),8)*k16_8
5055 DEALLOCATE(d)
5056 ENDIF
5057 ENDIF
5058 nb_bytes = nb_bytes -
5059 & (int(size(r_y),8)+int(size(r_locwk54),8))*k16_8
5060 nb_bytes = nb_bytes -
5061 & (int(size(c_y),8)+int(size(c_locwk54),8))*k35_8
5062 DEALLOCATE(r_y)
5063 DEALLOCATE(c_y)
5064 DEALLOCATE(r_locwk54)
5065 DEALLOCATE(c_locwk54)
5066C End POSTPros
5067 END IF
5068C============================================
5069C
5070C ITERATIVE REFINEMENT AND/OR ERROR ANALYSIS
5071C
5072C END
5073C
5074C============================================
5075C ==========================
5076C Begin reordering on master
5077C corresponding to maximum transversal permutation
5078C in case of centralized solution
5079C (ICNTL21==0)
5080C
5081 IF ( id%MYID .EQ. master .AND. icntl21==0
5082 & .AND. keep(23) .NE. 0.AND.keep(237).EQ.0) THEN
5083C ((No transpose and backward performed and NO A-1)
5084C or null space computation): permutation
5085C must be done on solution.
5086 IF ((keep(221).NE.1 .AND. mtype .EQ. 1)
5087 & .OR. keep(111) .NE.0 .OR. keep(252).NE.0 ) THEN
5088C Permute the solution RHS according to the column
5089C permutation held in UNS_PERM
5090C Column J of the permuted matrix corresponds to
5091C column UNS_PERM(J) of the original matrix.
5092C RHS holds the permuted solution
5093C Note that id%N>1 since KEEP(23)=0 when id%N=1
5094C
5095 ALLOCATE( c_rw1( id%N ),stat =allocok )
5096! temporary not in NB_BYTES
5097 IF ( allocok .GT. 0 ) THEN
5098 info(1)=-13
5099 info(2)=id%N
5100 WRITE(*,*) 'could not allocate ', id%N, 'integers.'
5101 CALL mumps_abort()
5102 END IF
5103 DO k = 1, nbrhs_eff
5104 IF (keep(242).EQ.0) THEN
5105 kdec = (k-1)*ld_rhs+ibeg-1
5106 ELSE
5107C -------------------------------
5108C Columns just computed might not
5109C be contiguous in original RHS
5110C -------------------------------
5111 kdec = int(perm_rhs(k-1+jbeg_rhs)-1,8)*int(ld_rhs,8)
5112 ENDIF
5113 DO i = 1, id%N
5114 c_rw1(i) = id%RHS(kdec+i)
5115 ENDDO
5116 DO i = 1, id%N
5117 jperm = id%UNS_PERM(i)
5118 id%RHS( kdec+jperm ) = c_rw1( i )
5119 ENDDO
5120 ENDDO
5121 DEALLOCATE( c_rw1 ) !temporary not in NB_BYTES
5122 END IF
5123 END IF
5124C
5125C End reordering on master
5126C ========================
5127 IF (id%MYID.EQ.master .and.icntl21==0.and.keep(221).NE.1.AND.
5128 & (keep(237).EQ.0) ) THEN
5129* print out the solution
5130 IF ( info(1) .GE. 0 .AND. icntl(4).GE.3 .AND. icntl(3).GT.0)
5131 & THEN
5132 k = min0(10, id%N)
5133 IF (icntl(4) .eq. 4 ) k = id%N
5134 j = min0(10,nbrhs_eff)
5135 IF (icntl(4) .eq. 4 ) j = nbrhs_eff
5136 DO ii=1, j
5137 WRITE(icntl(3),110) beg_rhs+ii-1
5138 WRITE(icntl(3),160)
5139 & (id%RHS(ibeg+(ii-1)*ld_rhs+i-1),i=1,k)
5140 ENDDO
5141 END IF
5142 END IF
5143C ==========================
5144C blocking for multiple RHS (END OF DO WHILE (BEG_RHS.LE.NBRHS)
5145 IF ((keep(248).EQ.1).AND.(keep(237).EQ.0)) THEN
5146 ! case of general sparse: in case of empty columns
5147 ! NBRHS_EFF might has been updated and broadcasted
5148 ! and holds the effective size of a contiguous block of
5149 ! non empty columns
5150 beg_rhs = beg_rhs + nbrhs_eff ! nb of nonempty columns
5151 ELSE
5152 beg_rhs = beg_rhs + nbrhs
5153 ENDIF
5154C }
5155 ENDDO
5156C END DO WHILE (BEG_RHS.LE.id%NRHS)
5157C =================================
5158 IF (keep(400) .GT. 0) THEN
5159 CALL cmumps_sol_l0omp_ld(keep(400))
5160 ENDIF
5161C
5162C ========================================================
5163C Reset RHS to zero for all remaining columns that
5164C have not been processed because they were emtpy
5165C ========================================================
5166 IF ( (id%MYID.EQ.master)
5167 & .AND. ( keep(248).NE.0 ) ! sparse RHS on input
5168 & .AND. ( keep(237).EQ.0 ) ! No A-1
5169 & .AND. ( icntl21.EQ.0 ) ! Centralized solution
5170 & .AND. ( keep(221) .NE.1 ) ! Not Reduced RHS step of Schur
5171 & .AND. ( jend_rhs .LT. id%NRHS )
5172 & )
5173 & THEN
5174 jbeg_new = jend_rhs + 1
5175 IF (do_permute_rhs.OR.interleave_par) THEN
5176 DO WHILE ( jbeg_new.LE. id%NRHS)
5177 DO i=1, id%N
5178 id%RHS(int(perm_rhs(jbeg_new) -1,8)*int(ld_rhs,8)+i)
5179 & = zero
5180 ENDDO
5181 jbeg_new = jbeg_new +1
5182 ENDDO
5183 ELSE
5184 DO WHILE ( jbeg_new.LE. id%NRHS)
5185 DO i=1, id%N
5186 id%RHS(int(jbeg_new -1,8)*int(ld_rhs,8) + i) = zero
5187 ENDDO
5188 jbeg_new = jbeg_new +1
5189 ENDDO
5190 ENDIF ! End DO_PERMUTE_RHS.OR.INTERLEAVE_PAR
5191 ENDIF
5192C ========================================================
5193C Reset id%SOL_loc to zero for all remaining columns that
5194C have not been processed because they were emtpy
5195C ========================================================
5196 IF ( i_am_slave .AND. (icntl21.NE.0) .AND.
5197 & ( jend_rhs .LT. id%NRHS ) .AND. keep(221).NE.1 ) THEN
5198 jbeg_new = jend_rhs + 1
5199 IF (do_permute_rhs.OR.interleave_par) THEN
5200 DO WHILE ( jbeg_new.LE. id%NRHS)
5201 DO i=1, keep(89)
5202 id%SOL_loc(int(perm_rhs(jbeg_new) -1,8)*
5203 & int(id%LSOL_loc,8)+int(i,8)) = zero
5204 ENDDO
5205 jbeg_new = jbeg_new +1
5206 ENDDO
5207 ELSE
5208C
5209 DO WHILE ( jbeg_new.LE. id%NRHS)
5210 DO i=1, keep(89)
5211 id%SOL_loc((jbeg_new -1)*id%LSOL_loc + i) = zero
5212 ENDDO
5213 jbeg_new = jbeg_new +1
5214 ENDDO
5215 ENDIF
5216 ENDIF
5217C
5218C ================================================================
5219C Reset id%RHSCOMP and id%REDRHS to zero for all remaining columns
5220C that have not been processed because they were emtpy
5221C ================================================================
5222 IF ((keep(221).EQ.1) .AND.
5223 & ( jend_rhs .LT. id%NRHS ) ) THEN
5224 IF (id%MYID .EQ. master) THEN
5225 jbeg_new = jend_rhs + 1
5226 DO WHILE ( jbeg_new.LE. id%NRHS)
5227 DO i=1, id%SIZE_SCHUR
5228 id%REDRHS(int(jbeg_new -1,8)*int(ld_redrhs,8) +
5229 & int(i,8)) = zero
5230 ENDDO
5231 jbeg_new = jbeg_new +1
5232 ENDDO
5233 ENDIF
5234 IF (i_am_slave) THEN
5235 jbeg_new = jend_rhs + 1
5236 DO WHILE ( jbeg_new.LE. id%NRHS)
5237 DO i=1,nbent_rhscomp
5238 id%RHSCOMP(int(jbeg_new -1,8)*int(ld_rhscomp,8) +
5239 & int(i,8)) = zero
5240 ENDDO
5241 jbeg_new = jbeg_new +1
5242 ENDDO
5243 ENDIF
5244 ENDIF
5245C
5246C
5247C ! maximum size used on that proc
5248 id%INFO(26) = int(nb_bytes_max / 1000000_8)
5249C Centralize memory statistics on the host
5250C
5251C INFOG(30) = size of mem in bytes for solve
5252C for the processor using largest memory
5253C INFOG(31) = size of mem in bytes for solve
5254C sum over all processors
5255C ----------------------------------------------------
5256 CALL mumps_mem_centralize( id%MYID, id%COMM,
5257 & id%INFO(26), id%INFOG(30), irank )
5258 IF ( prokg ) THEN
5259 IF (print_maxavg) THEN
5260 WRITE( mpg,'(A,I10) ')
5261 & ' ** Rank of processor needing largest memory in solve :',
5262 & irank
5263 WRITE( mpg,'(A,I10) ')
5264 & ' ** Space in MBYTES used by this processor for solve :',
5265 & id%INFOG(30)
5266 IF ( keep(46) .eq. 0 ) THEN
5267 WRITE( mpg,'(A,I10) ')
5268 & ' ** Avg. Space in MBYTES per working proc during solve :',
5269 & ( id%INFOG(31)-id%INFO(26) ) / id%NSLAVES
5270 ELSE
5271 WRITE( mpg,'(A,I10) ')
5272 & ' ** Avg. Space in MBYTES per working proc during solve :',
5273 & id%INFOG(31) / id%NSLAVES
5274 END IF
5275 ELSE
5276 WRITE( mpg,'(A,I10) ')
5277 & ' ** Space in MBYTES used for solve :',
5278 & id%INFOG(30)
5279 ENDIF
5280 END IF
5281*===============================
5282*End of Solve Phase
5283*===============================
5284C Store and print timings
5285 CALL mumps_secfin(time3)
5286 id%DKEEP(112)=real(time3)
5287 id%DKEEP(113)=real(timec2)
5288 id%DKEEP(115)=real(timescatter2)
5289 id%DKEEP(116)=real(timegather2)
5290 id%DKEEP(122)=real(timecopyscale2)
5291C Reductions of DKEEP(115,116,117,118,119,122):
5292 CALL mpi_reduce( id%DKEEP(115), id%DKEEP(160),1,
5293 &mpi_real, mpi_max, master, id%COMM, ierr )
5294 CALL mpi_reduce( id%DKEEP(116), id%DKEEP(161),1,
5295 &mpi_real, mpi_max, master, id%COMM, ierr )
5296 CALL mpi_reduce( id%DKEEP(117), id%DKEEP(162),1,
5297 &mpi_real, mpi_max, master, id%COMM, ierr )
5298 CALL mpi_reduce( id%DKEEP(118), id%DKEEP(163),1,
5299 &mpi_real, mpi_max, master, id%COMM, ierr )
5300 CALL mpi_reduce( id%DKEEP(119), id%DKEEP(164),1,
5301 &mpi_real, mpi_max, master, id%COMM, ierr )
5302 CALL mpi_reduce( id%DKEEP(122), id%DKEEP(165),1,
5303 &mpi_real, mpi_max, master, id%COMM, ierr )
5304C
5305 IF (prokg) THEN
5306 WRITE ( mpg, *)
5307 WRITE ( mpg, *) "Leaving solve with ..."
5308 WRITE( mpg, 434 ) id%DKEEP(160) ! max id%DKEEP(115)
5309 WRITE( mpg, 432 ) id%DKEEP(113) ! ok without reduction
5310 WRITE( mpg, 435 ) id%DKEEP(162) ! max id%DKEEP(117)
5311 IF ((keep(38).NE.0).OR.(keep(20).NE.0))
5312 & WRITE( mpg, 437 ) id%DKEEP(164) ! id%DKEEP(119)
5313 WRITE( mpg, 436 ) id%DKEEP(163) ! id%DKEEP(118)
5314 WRITE( mpg, 433 ) id%DKEEP(161) ! max(DKEEP(116)) -- Gather
5315 WRITE( mpg, 431 ) id%DKEEP(165) ! max(DKEEP(122)) -- Dist. sol.
5316 ENDIF
5317 IF ( prok ) THEN
5318 WRITE ( mp, *)
5319 WRITE ( mp, *) "Local statistics"
5320 WRITE( mp, 434 ) id%DKEEP(115)
5321 WRITE( mp, 432 ) id%DKEEP(113)
5322 WRITE( mp, 435 ) id%DKEEP(117)
5323 IF ((keep(38).NE.0).OR.(keep(20).NE.0))
5324 & WRITE( mp, 437 ) id%DKEEP(119)
5325 WRITE( mp, 436 ) id%DKEEP(118)
5326 WRITE( mp, 433 ) id%DKEEP(116)
5327 WRITE( mp, 431 ) id%DKEEP(122)
5328 END IF
5329 90 CONTINUE
5330 IF (info(1) .LT.0 ) THEN
5331 ENDIF
5332 IF (keep(485) .EQ. 1) THEN
5333 keep(350) = keep350_save
5334 IF (is_lr_mod_to_struc_done) THEN
5335 CALL cmumps_blr_mod_to_struc(id%BLRARRAY_ENCODING)
5336 CALL mumps_fdm_mod_to_struc('F',id%FDM_F_ENCODING,
5337 & id%INFO(1))
5338 ENDIF
5339 ENDIF
5340 IF (keep(201).GT.0)THEN
5341 IF (is_init_ooc_done) THEN
5342 CALL cmumps_ooc_end_solve(ierr)
5343 IF (ierr.LT.0 .AND. info(1) .GE. 0) info(1) = ierr
5344 ENDIF
5345 CALL mumps_propinfo( icntl(1), info(1),
5346 & id%COMM,id%MYID)
5347 ENDIF
5348C ------------------------
5349C Check allocation before
5350C to deallocate (cases of
5351C errors that could happen
5352C before or after allocate
5353C statement)
5354C
5355C Sparse RHS
5356C Free space and reset pointers if needed
5357 IF (irhs_sparse_copy_allocated) THEN
5358 nb_bytes = nb_bytes -
5359 & int(size(irhs_sparse_copy),8)*k34_8
5360 DEALLOCATE(irhs_sparse_copy)
5361 irhs_sparse_copy_allocated=.false.
5362 NULLIFY(irhs_sparse_copy)
5363 ENDIF
5364 IF (irhs_ptr_copy_allocated) THEN
5365 nb_bytes = nb_bytes -
5366 & int(size(irhs_ptr_copy),8)*k34_8
5367 DEALLOCATE(irhs_ptr_copy)
5368 irhs_ptr_copy_allocated=.false.
5369 NULLIFY(irhs_ptr_copy)
5370 ENDIF
5371 IF (rhs_sparse_copy_allocated) THEN
5372 nb_bytes = nb_bytes -
5373 & int(size(rhs_sparse_copy),8)*k35_8
5374 DEALLOCATE(rhs_sparse_copy)
5375 rhs_sparse_copy_allocated=.false.
5376 NULLIFY(rhs_sparse_copy)
5377 ENDIF
5378 IF (allocated(map_rhs_loc)) THEN
5379 nb_bytes = nb_bytes - int(size(map_rhs_loc),8)*k34_8
5380 DEALLOCATE(map_rhs_loc)
5381 ENDIF
5382 IF (irhs_loc_ptr_allocated ) THEN
5383 nb_bytes = nb_bytes - int(size(irhs_loc_ptr),8)*k34_8
5384 DEALLOCATE(irhs_loc_ptr)
5385 NULLIFY(irhs_loc_ptr)
5386 irhs_loc_ptr_allocated = .false.
5387 ENDIF
5388 IF (i_am_slave.AND.lscal.AND.keep(248).EQ.-1) THEN
5389 nb_bytes = nb_bytes -
5390 & int(size(scaling_data_dr%SCALING_LOC),8)*k16_8
5391 DEALLOCATE(scaling_data_dr%SCALING_LOC)
5392 NULLIFY (scaling_data_dr%SCALING_LOC)
5393 ENDIF
5394 IF (allocated(perm_rhs)) THEN
5395 nb_bytes = nb_bytes - int(size(perm_rhs),8)*k34_8
5396 DEALLOCATE(perm_rhs)
5397 ENDIF
5398C END A-1
5399 IF (allocated(uns_perm_inv)) THEN
5400 nb_bytes = nb_bytes - int(size(uns_perm_inv),8)*k34_8
5401 DEALLOCATE(uns_perm_inv)
5402 ENDIF
5403 IF (allocated(bufr)) THEN
5404 nb_bytes = nb_bytes - int(size(bufr),8)*k34_8
5405 DEALLOCATE(bufr)
5406 ENDIF
5407 IF ( i_am_slave ) THEN
5408 IF (allocated(rhs_bounds)) THEN
5409 nb_bytes = nb_bytes -
5410 & int(size(rhs_bounds),8)*k34_8
5411 DEALLOCATE(rhs_bounds)
5412 ENDIF
5413 IF (allocated(iwk_solve)) THEN
5414 nb_bytes = nb_bytes - int(size(iwk_solve),8)*k34_8
5415 DEALLOCATE( iwk_solve )
5416 ENDIF
5417 IF (allocated(ptracb)) THEN
5418 nb_bytes = nb_bytes - int(size(ptracb),8)*k34_8*
5419 & int(keep(10),8)
5420 DEALLOCATE( ptracb )
5421 ENDIF
5422 IF (allocated(iwcb)) THEN
5423 nb_bytes = nb_bytes - int(size(iwcb),8)*k34_8
5424 DEALLOCATE( iwcb )
5425 ENDIF
5426C ------------------------
5427C SLAVE CODE
5428C -----------------------
5429C Deallocate send buffers
5430C -----------------------
5431 IF (id%NSLAVES .GT. 1) THEN
5432 CALL cmumps_buf_deall_cb( ierr )
5433 CALL cmumps_buf_deall_small_buf( ierr )
5434 ENDIF
5435 END IF
5436C
5437 IF ( id%MYID .eq. master ) THEN
5438C ------------------------
5439C SAVERHS may have been
5440C allocated only on master
5441C ------------------------
5442 IF (allocated(saverhs)) THEN
5443 nb_bytes = nb_bytes - int(size(saverhs),8)*k35_8
5444 DEALLOCATE( saverhs)
5445 ENDIF
5446C Nullify RHS_IR might have been pointing to id%RHS
5447 NULLIFY(rhs_ir)
5448 ELSE
5449C --------------------
5450C Free right-hand-side
5451C on slave processors
5452C --------------------
5453 IF (associated(rhs_ir)) THEN
5454 nb_bytes = nb_bytes - int(size(rhs_ir),8)*k35_8
5455 DEALLOCATE(rhs_ir)
5456 NULLIFY(rhs_ir)
5457 END IF
5458 END IF
5459 IF (i_am_slave) THEN
5460C Deallocate temporary workspace SRW3
5461 IF (allocated(srw3)) THEN
5462 nb_bytes = nb_bytes - int(size(srw3),8)*k35_8
5463 DEALLOCATE(srw3)
5464 ENDIF
5465 IF (lscal .AND. icntl21==1) THEN
5466C Free local scaling arrays
5467 nb_bytes = nb_bytes -
5468 & int(size(scaling_data_sol%SCALING_LOC),8)*k16_8
5469 DEALLOCATE(scaling_data_sol%SCALING_LOC)
5470 NULLIFY(scaling_data_sol%SCALING_LOC)
5471 ENDIF
5472C Free memory until next call to CMUMPS
5473 IF (wk_user_provided) THEN
5474C S points to WK_USER provided by user
5475C KEEP8(24) holds size of WK_USER
5476C it should be saved and is used
5477C in incore to check that size provided is consistent
5478C (see error -41)
5479 NULLIFY(id%S)
5480 ELSE IF (associated(id%S).AND.keep(201).GT.0) THEN
5481C OOC: free space for S that was allocated
5482 nb_bytes = nb_bytes - keep8(23)*k35_8
5483 id%KEEP8(23)=0_8
5484 DEALLOCATE(id%S)
5485 NULLIFY(id%S)
5486 ENDIF
5487 IF (keep(221).NE.1) THEN
5488C -- After reduction of RHS to Schur variables
5489C -- keep compressed RHS generated during FWD step
5490C -- to be used for future expansion
5491 IF (associated(id%RHSCOMP)) THEN
5492 nb_bytes = nb_bytes - id%KEEP8(25)*k35_8
5493 DEALLOCATE(id%RHSCOMP)
5494 NULLIFY(id%RHSCOMP)
5495 id%KEEP8(25)=0_8
5496 ENDIF
5497 IF (associated(id%POSINRHSCOMP_ROW)) THEN
5498 nb_bytes = nb_bytes -
5499 & int(size(id%POSINRHSCOMP_ROW),8)*k34_8
5500 DEALLOCATE(id%POSINRHSCOMP_ROW)
5501 NULLIFY(id%POSINRHSCOMP_ROW)
5502 ENDIF
5503 IF (id%POSINRHSCOMP_COL_ALLOC) THEN
5504 nb_bytes = nb_bytes -
5505 & int(size(id%POSINRHSCOMP_COL),8)*k34_8
5506 DEALLOCATE(id%POSINRHSCOMP_COL)
5507 NULLIFY(id%POSINRHSCOMP_COL)
5508 id%POSINRHSCOMP_COL_ALLOC = .false.
5509 ENDIF
5510 ENDIF
5511 IF ( work_wcb_allocated ) THEN
5512 nb_bytes = nb_bytes - int(size(work_wcb),8)*k35_8
5513 DEALLOCATE( work_wcb )
5514 ENDIF
5515C Otherwise, WORK_WCB may point to some
5516C position inside id%S, nullify it
5517 NULLIFY( work_wcb )
5518 ENDIF
5519 RETURN
5520 55 FORMAT (//' ERROR ANALYSIS BEFORE ITERATIVE REFINEMENT')
5521 100 FORMAT(//' ****** SOLVE & CHECK STEP ********'/)
5522 110 FORMAT (//' Vector solution for column ',i12)
5523 115 FORMAT(1x, a44,1p,d9.2)
5524 434 FORMAT(' Time to build/scatter RHS =',f15.6)
5525 432 FORMAT(' Time in solution step (fwd/bwd) =',f15.6)
5526 435 FORMAT(' .. Time in forward (fwd) step = ',f15.6)
5527 437 FORMAT(' .. Time in ScaLAPACK root = ',f15.6)
5528 436 FORMAT(' .. Time in backward (bwd) step = ',f15.6)
5529 433 FORMAT(' Time to gather solution(cent.sol)=',f15.6)
5530 431 FORMAT(' Time to copy/scale dist. solution=',f15.6)
5531 150 FORMAT(' GLOBAL STATISTICS PRIOR SOLVE PHASE ...........'/
5532 & ' Number of right-hand-sides =',i12/
5533 & ' Blocking factor for multiple rhs =',i12/
5534 & ' ICNTL (9) =',i12/
5535 & ' --- (10) =',i12/
5536 & ' --- (11) =',i12/
5537 & ' --- (20) =',i12/
5538 & ' --- (21) =',i12/
5539 & ' --- (30) =',i12/
5540 & ' --- (35) =',i12
5541 & )
5542 151 FORMAT (' --- (25) =',i12)
5543 152 FORMAT (' --- (26) =',i12)
5544 153 FORMAT (' --- (32) =',i12)
5545 160 FORMAT (' RHS'/(1x,1p,5e14.6))
5546 170 FORMAT (/' ERROR ANALYSIS' )
5547 240 FORMAT (1x, a42,i4)
5548 270 FORMAT (//' BEGIN ITERATIVE REFINEMENT' )
5549 81 FORMAT (/' STATISTICS AFTER ITERATIVE REFINEMENT ')
5550 131 FORMAT (/' END ITERATIVE REFINEMENT ')
5551 141 FORMAT(1x, a52,i4)
5552 CONTAINS
5553 SUBROUTINE cmumps_check_distrhs(
5554 & idNloc_RHS,
5555 & idLRHS_loc,
5556 & NRHS,
5557 & idIRHS_loc,
5558 & idRHS_loc,
5559 & INFO)
5560C
5561C Purpose:
5562C =======
5563C
5564C Check distributed RHS format. We assume that
5565C the user has indicated that he/she provided
5566C a distributed RHS (KEEP(248)=-1). We also
5567C assume that the nb of RHS columns NRHS has
5568C been broadcasted to all processes. This
5569C routine should then be called on the workers.
5570C
5571C Arguments:
5572C =========
5573C
5574 INTEGER, INTENT( IN ) :: idNloc_RHS
5575 INTEGER, INTENT( IN ) :: idLRHS_loc
5576 INTEGER, INTENT( IN ) :: NRHS
5577#if defined(MUMPS_F2003)
5578 INTEGER, INTENT( IN ), POINTER :: idIRHS_loc (:)
5579 COMPLEX, INTENT( IN ), POINTER :: idRHS_loc (:)
5580#else
5581 INTEGER, POINTER :: idIRHS_loc (:)
5582 COMPLEX, POINTER :: idRHS_loc (:)
5583#endif
5584 INTEGER, INTENT( INOUT ) :: INFO(80)
5585C
5586C Local declarations:
5587C ==================
5588C
5589 INTEGER(8) :: REQSIZE8
5590C
5591C Executable statements:
5592C =====================
5593C
5594C Quick return if nothing on this proc
5595 IF (idnloc_rhs .LE. 0) RETURN
5596C Check for leading dimension
5597 IF (nrhs.NE.1) THEN
5598 IF ( idlrhs_loc .LT. idnloc_rhs) THEN
5599 info(1)=-55
5600 info(2)=idlrhs_loc
5601 RETURN
5602 ENDIF
5603 ENDIF
5604 IF (idnloc_rhs .GT. 0) THEN
5605C Check association and size of index array idIRHS_loc
5606 IF (.NOT. associated(idirhs_loc)) THEN
5607 id%INFO(1)=-22
5608 id%INFO(2)=17
5609 RETURN
5610 ELSE IF (size(idirhs_loc) .LT. idnloc_rhs) THEN
5611 info(1)=-22
5612 info(2)= 17
5613 RETURN
5614 ENDIF
5615C Check association and size of value array idRHS_loc
5616 IF (.NOT. associated(idrhs_loc)) THEN
5617 id%INFO(1)=-22
5618 id%INFO(2)=18
5619 RETURN
5620 ELSE
5621C Check size of array of values idRHS_loc
5622 reqsize8 = int(idlrhs_loc,8)*int(nrhs,8)
5623 & + int(-idlrhs_loc+idnloc_rhs,8)
5624#if defined(MUMPS_F2003)
5625 IF (size(idrhs_loc,kind=8) .LT. reqsize8) THEN
5626#else
5627 IF ( reqsize8 .LE. int(huge(idnloc_rhs),8) .AND.
5628 & size(idrhs_loc) .LT. int(reqsize8) ) THEN
5629C (Warning: this assumes that size(idRHS_loc)
5630C does not overflow)
5631#endif
5632 info(1)=-22
5633 info(2)=18
5634 RETURN
5635 ENDIF
5636 ENDIF
5637 ENDIF
5638 RETURN
5639 END SUBROUTINE cmumps_check_distrhs
5640 SUBROUTINE cmumps_pp_solve()
5641 IMPLICIT NONE
5642C
5643C Purpose:
5644C =======
5645C Scatter right-hand side, solve the system,
5646C and gather the solution on the host during
5647C post-processing.
5648C We use an internal subroutine to avoid code
5649C duplication without the complication of adding
5650C new parameters or local variables. All variables
5651C in this routine have the scope of CMUMPS_SOL_DRIVER.
5652C
5653C
5654 IF (kase .NE. 1 .AND. kase .NE. 2) THEN
5655 WRITE(*,*) "Internal error 1 in CMUMPS_PP_SOLVE"
5656 CALL mumps_abort()
5657 ENDIF
5658 IF ( id%MYID .eq. master ) THEN
5659C Define matrix B as follows:
5660C MTYPE=1 => B=A other values B=At
5661C The user asked to solve the system Bx=b
5662C
5663C THEN
5664C KASE = 1........ RW1 = INV(TRANSPOSE(B)) * RW1
5665C KASE = 2........ RW1 = INV(B) * RW1
5666 IF ( mtype .EQ. 1 ) THEN
5667 solvet = kase - 1
5668 ELSE
5669 solvet = kase
5670 END IF
5671C SOLVET= 1 -> solve A x = B, other values solve Atx=b
5672C We force SOLVET to have value either 0 or 1, in order
5673C to be able to test both values, and also, be able to
5674C test whether SOLVET = MTYPE or not.
5675 IF ( solvet.EQ.2 ) solvet = 0
5676 IF ( lscal ) THEN
5677 IF ( solvet .EQ. 1 ) THEN
5678C Apply rowscaling
5679 DO k = 1, id%N
5680 c_y( k ) = c_y( k ) * id%ROWSCA( k )
5681 END DO
5682 ELSE
5683C Apply column scaling
5684 DO k = 1, id%N
5685 c_y( k ) = c_y( k ) * id%COLSCA( k )
5686 END DO
5687 END IF
5688 END IF
5689 END IF ! MYID.EQ.MASTER
5690C ------------------------------
5691C Broadcast SOLVET to the slaves
5692C ------------------------------
5693 CALL mpi_bcast( solvet, 1, mpi_integer, master,
5694 & id%COMM, ierr)
5695C --------------------------------------------
5696C Scatter the right hand side C_Y on all procs
5697C --------------------------------------------
5698 IF ( .NOT.i_am_slave ) THEN
5699C -- Master not working
5700 CALL cmumps_scatter_rhs(id%NSLAVES,id%N, id%MYID,
5701 & id%COMM,
5702 & solvet, c_y(1), id%N, 1,
5703 & 1,
5704 & c_dummy, 1, 1,
5705 & idummy, 0,
5706 & jdummy, id%KEEP(1), id%KEEP8(1), id%PROCNODE_STEPS(1),
5707 & idummy, 1,
5708 & id%STEP(1),
5709 & id%ICNTL(1),id%INFO(1))
5710 ELSE
5711 IF (solvet.EQ.mtype) THEN
5712C POSINRHSCOMP_ROW is with respect to the
5713C original linear system (transposed or not)
5714 ptr_posinrhscomp_fwd => id%POSINRHSCOMP_ROW
5715 ELSE
5716C Transposed, use column indices of original
5717C system (ie, col indices of A or A^T)
5718 ptr_posinrhscomp_fwd => id%POSINRHSCOMP_COL
5719 ENDIF
5720 liw_passed = max( liw, 1 )
5721 CALL cmumps_scatter_rhs(id%NSLAVES,id%N, id%MYID,
5722 & id%COMM,
5723 & solvet, c_y(1), id%N, 1,
5724 & 1,
5725 & id%RHSCOMP(ibeg_rhscomp), ld_rhscomp, 1,
5726 & ptr_posinrhscomp_fwd(1), nb_fs_rhscomp_f,
5727C
5728 & id%PTLUST_S(1), id%KEEP(1), id%KEEP8(1),
5729 & id%PROCNODE_STEPS(1),
5730 & is(1), liw_passed,
5731 & id%STEP(1),
5732 & id%ICNTL(1),id%INFO(1))
5733 ENDIF
5734 IF (info(1).LT.0) GOTO 89
5735C
5736C Solve the system
5737C
5738 IF ( i_am_slave ) THEN
5739 liw_passed = max( liw, 1 )
5740 la_passed = max( la, 1_8 )
5741 IF (solvet.EQ.mtype) THEN
5742 ptr_posinrhscomp_fwd => id%POSINRHSCOMP_ROW
5743 ptr_posinrhscomp_bwd => id%POSINRHSCOMP_COL
5744 ELSE
5745 ptr_posinrhscomp_fwd => id%POSINRHSCOMP_COL
5746 ptr_posinrhscomp_bwd => id%POSINRHSCOMP_ROW
5747 ENDIF
5748 from_pp=.true.
5749 nbsparse_loc = .false.
5750 CALL cmumps_sol_c(id%root, id%N, id%S(1), la_passed, id%IS(1),
5751 & liw_passed,work_wcb(1),lwcb8_sol_c,iwcb,liwcb,nbrhs_eff,id%NA(1),
5752 & id%LNA,id%NE_STEPS(1),srw3,solvet,icntl(1),from_pp,id%STEP(1),
5753 & id%FRERE_STEPS(1),id%DAD_STEPS(1),id%FILS(1),id%PTLUST_S(1),
5754 & id%PTRFAC(1), iwk_solve(1), liwk_solve, ptracb, liwk_ptracb,
5755 & id%PROCNODE_STEPS(1), id%NSLAVES, info(1), keep(1), keep8(1),
5756 & id%DKEEP(1),id%COMM_NODES,id%MYID,id%MYID_NODES, bufr(1), lbufr,
5757 & lbufr_bytes, id%ISTEP_TO_INIV2(1), id%TAB_POS_IN_PERE(1,1),
5758C Next 3 arguments are not used in this call
5759 & ibeg_root_def,iend_root_def,iroot_def_rhs_col1, ptr_rhs_root(1),
5760 & lptr_rhs_root, size_root, master_root, id%RHSCOMP(ibeg_rhscomp),
5761 & ld_rhscomp,ptr_posinrhscomp_fwd(1),ptr_posinrhscomp_bwd(1),
5762 & 1,1,1,1, idummy, 1, jdummy, kdummy, 1, ldummy, 1, mdummy, 1,1,
5763 & nbsparse_loc, ptr_rhs_bounds(1), lptr_rhs_bounds
5764 & , id%IPOOL_B_L0_OMP(1), id%LPOOL_B_L0_OMP, id%IPOOL_A_L0_OMP(1),
5765 & id%LPOOL_A_L0_OMP, id%L_VIRT_L0_OMP, id%VIRT_L0_OMP(1),
5766 & id%L_PHYS_L0_OMP, id%PHYS_L0_OMP(1), id%PERM_L0_OMP(1),
5767 & id%PTR_LEAFS_L0_OMP(1), id%L0_OMP_MAPPING(1), id%LL0_OMP_MAPPING,
5768 & id%L0_OMP_FACTORS(1), id%LL0_OMP_FACTORS
5769 & )
5770 END IF
5771C ------------------
5772C Change error codes
5773C ------------------
5774 IF (info(1).eq.-2) info(1)=-12
5775 IF (info(1).eq.-3) info(1)=-15
5776C
5777 IF (info(1) .GE. 0) THEN
5778C We need a workspace of minimal size KEEP(247)
5779C in order to unpack pieces of the solution during
5780C CMUMPS_GATHER_SOLUTION below
5781C - Avoid allocation if error already occurred.
5782C - DEALLOCATE called after GATHER_SOLUTION
5783C CWORK not needed for AM1
5784 ALLOCATE( cwork(max(max(keep(247),keep(246)),1)),
5785 & stat=allocok)
5786 IF (allocok > 0) THEN
5787 info(1)=-13
5788 info(2)=max(max(keep(247),keep(246)),1)
5789 ENDIF
5790 ENDIF
5791C -------------------------
5792C Propagate possible errors
5793C -------------------------
5794 89 CALL mumps_propinfo( icntl(1), info(1),
5795 & id%COMM,id%MYID)
5796C
5797C Return in case of error.
5798 IF (info(1).LT.0) RETURN
5799C -------------------------------
5800C Assemble the solution on master
5801C -------------------------------
5802C (Note: currently, if this part of code is executed,
5803C then necessarily NBRHS_EFF = 1)
5804C
5805C === GATHER and SCALE solution ==============
5806C
5807 IF ((id%MYID.NE.master).OR. .NOT.lscal) THEN
5808 pt_scaling => dummy_scal
5809 ELSE
5810 IF (solvet.EQ.1) THEN
5811 pt_scaling => id%COLSCA
5812 ELSE
5813 pt_scaling => id%ROWSCA
5814 ENDIF
5815 ENDIF
5816 liw_passed = max( liw, 1 )
5817C Solution computed during CMUMPS_SOL_C has been stored
5818C in id%RHSCOMP and is gathered on the master in C_Y
5819 IF ( .NOT. i_am_slave ) THEN
5820C I did not participate to computing part of the solution
5821C (id%RHSCOMP not set/allocate) : receive solution, store
5822C it and scale it.
5823 CALL cmumps_gather_solution(id%NSLAVES,id%N,
5824 & id%MYID, id%COMM, nbrhs_eff,
5825 & solvet, c_y, id%N, nbrhs_eff, 1,
5826 & jdummy, id%KEEP(1),id%KEEP8(1), id%PROCNODE_STEPS(1),
5827 & idummy, 1,
5828 & id%STEP(1), bufr(1), lbufr, lbufr_bytes,
5829 & cwork(1), size(cwork),
5830 & lscal, pt_scaling(1), size(pt_scaling),
5831! RHSCOMP not on non-working master
5832 & c_dummy, 1 , 1, idummy, 1,
5833! for sparse permuted RHS on host
5834 & perm_rhs, size(perm_rhs)
5835 & )
5836 ELSE
5837 CALL cmumps_gather_solution(id%NSLAVES,id%N,
5838 & id%MYID, id%COMM, nbrhs_eff,
5839 & solvet, c_y, id%N, nbrhs_eff, 1,
5840 & id%PTLUST_S(1), id%KEEP(1),id%KEEP8(1),
5841 & id%PROCNODE_STEPS(1),
5842 & is(1), liw_passed,
5843 & id%STEP(1), bufr(1), lbufr, lbufr_bytes,
5844 & cwork(1), size(cwork),
5845 & lscal, pt_scaling(1), size(pt_scaling),
5846 & id%RHSCOMP(ibeg_rhscomp), ld_rhscomp, nbrhs_eff,
5847 & ptr_posinrhscomp_bwd(1), id%N,
5848 & perm_rhs, size(perm_rhs)) ! for sparse permuted RHS on host
5849 ENDIF
5850 DEALLOCATE( cwork )
5851 END SUBROUTINE cmumps_pp_solve
float cmplx[2]
Definition pblas.h:136
subroutine cmumps_check_redrhs(id)
subroutine cmumps_set_k221(id)
subroutine cmumps_check_dense_rhs(idrhs, idinfo, idn, idnrhs, idlrhs)
subroutine cmumps_permute_rhs_gs(lp, lpok, prokg, mpg, perm_strat, sym_perm, n, nrhs, irhs_ptr, size_irhs_ptr, irhs_sparse, nzrhs, perm_rhs, ierr)
subroutine cmumps_interleave_rhs_am1(perm_rhs, size_perm, iptr_working, size_iptr_working, working, size_working, irhs_ptr, step, sym_perm, n, nbrhs, procnode, nsteps, slavef, keep199, behaviour_l0, reorder, n_select, prokg, mpg)
subroutine cmumps_permute_rhs_am1(perm_strat, sym_perm, irhs_ptr, nhrs, perm_rhs, sizeperm, ierr)
subroutine cmumps_get_ns_options_solve(icntl, keep, nrhs, mpg, info)
subroutine cmumps_eltyd(mtype, n, nelt, eltptr, leltvar, eltvar, na_elt8, a_elt, saverhs, x, y, w, k50)
Definition csol_aux.F:654
subroutine cmumps_sol_x(a, nz8, n, irn, icn, z, keep, keep8, eff_size_schur, sym_perm)
Definition csol_aux.F:88
subroutine cmumps_sol_lcond(n, rhs, x, y, d, r_w, c_w, iw, kase, omega, erx, cond, lp, keep, keep8)
Definition csol_aux.F:940
subroutine cmumps_build_mapping_info(id)
Definition csol_aux.F:776
subroutine cmumps_sol_y(a, nz8, n, irn, icn, rhs, x, r, w, keep, keep8)
Definition csol_aux.F:224
subroutine cmumps_sol_q(mtype, iflag, n, lhs, wrhs, w, res, givnorm, anorm, xnorm, sclnrm, mprint, icntl, keep, keep8)
Definition csol_aux.F:1090
subroutine cmumps_eltqd2(mtype, n, nelt, eltptr, leltvar, eltvar, na_elt8, a_elt, lhs, wrhs, w, rhs, keep, keep8)
Definition csol_aux.F:510
subroutine cmumps_sol_x_elt(mtype, n, nelt, eltptr, leltvar, eltvar, na_elt8, a_elt, w, keep, keep8)
Definition csol_aux.F:530
subroutine cmumps_qd2(mtype, n, nz8, aspk, irn, icn, lhs, wrhs, w, rhs, keep, keep8)
Definition csol_aux.F:423
subroutine cmumps_set_scaling_loc(scaling_data, n, iloc, liloc, comm, myid, i_am_slave, master, nb_bytes, nb_bytes_max, k16_8, lp, lpok, icntl, info)
Definition csol_aux.F:1662
subroutine cmumps_sol_omega(n, rhs, x, y, r_w, c_w, iw, iflag, omega, noiter, testconv, lp, arret, grain)
Definition csol_aux.F:862
subroutine cmumps_gather_solution_am1(nslaves, n, myid, comm, nrhs, rhscomp, lrhscomp, nrhscomp_col, keep, buffer, size_buf, size_buf_bytes, lscal, scaling, lscaling, irhs_ptr_copy, lirhs_ptr_copy, irhs_sparse_copy, lirhs_sparse_copy, rhs_sparse_copy, lrhs_sparse_copy, uns_perm_inv, luns_perm_inv, posinrhscomp, lpos_row, nb_fs_in_rhscomp)
Definition csol_c.F:1448
subroutine cmumps_build_posinrhscomp_am1(nslaves, n, myid_nodes, ptrist, dad, keep, keep8, procnode_steps, iw, liw, step, posinrhscomp_row, posinrhscomp_col, posinrhscomp_col_alloc, mtype, irhs_ptr, nbcol_inbloc, irhs_sparse, nz_rhs, perm_rhs, size_perm_rhs, jbeg_rhs, nbent_rhscomp, nb_fs_in_rhscomp_fwd, nb_fs_in_rhscomp_tot, uns_perm_inv, size_uns_perm_inv)
Definition csol_c.F:2173
subroutine cmumps_distsol_indices(mtype, isol_loc, ptrist, keep, keep8, iw, liw_passed, myid_nodes, n, step, procnode, nslaves, scaling_data, lscal, irhs_loc_meaningful, irhs_loc, nloc_rhs)
Definition csol_c.F:1644
subroutine cmumps_build_posinrhscomp(nslaves, n, myid_nodes, ptrist, keep, keep8, procnode_steps, iw, liw, step, posinrhscomp_row, posinrhscomp_col, posinrhscomp_col_alloc, mtype, nbent_rhscomp, nb_fs_in_rhscomp)
Definition csol_c.F:2067
subroutine cmumps_distributed_solution(slavef, n, myid_nodes, mtype, rhscomp, lrhscomp, nbrhs_eff, posinrhscomp, isol_loc, sol_loc, nrhs, beg_rhs, lsol_loc, ptrist, procnode_steps, keep, keep8, iw, liw, step, scaling_data, lscal, nb_rhsskipped, perm_rhs, size_perm_rhs)
Definition csol_c.F:1719
subroutine cmumps_scatter_dist_rhs(nslaves, n, myid_nodes, comm_nodes, nrhs_col, nrhs_loc, lrhs_loc, map_rhs_loc, irhs_loc, rhs_loc, rhs_loc_size, rhscomp, ld_rhscomp, posinrhscomp_fwd, nb_fs_in_rhscomp, lscal, scaling_data_dr, lp, lpok, keep, nb_bytes_loc, info)
subroutine cmumps_check_distrhs(idnloc_rhs, idlrhs_loc, nrhs, idirhs_loc, idrhs_loc, info)
subroutine cmumps_pp_solve()
subroutine cmumps_loc_mv8(n, nz_loc8, irn_loc, jcn_loc, a_loc, x, y_loc, ldlt, mtype)
subroutine cmumps_loc_omega1(n, nz_loc8, irn_loc, jcn_loc, a_loc, x, y_loc, ldlt, mtype)
subroutine cmumps_size_in_struct(id, nb_int, nb_cmplx, nb_char)
Definition ctools.F:1673
#define min(a, b)
Definition macros.h:20
subroutine mpi_recv(buf, cnt, datatype, source, tag, comm, status, ierr)
Definition mpi.f:461
subroutine mpi_reduce(sendbuf, recvbuf, cnt, datatype, op, root, comm, ierr)
Definition mpi.f:120
double precision function mpi_wtime()
Definition mpi.f:561
subroutine mpi_send(buf, cnt, datatype, dest, tag, comm, ierr)
Definition mpi.f:480
subroutine, public cmumps_buf_deall_small_buf(ierr)
subroutine, public cmumps_buf_deall_cb(ierr)
subroutine, public cmumps_buf_alloc_cb(size, ierr)
subroutine, public cmumps_buf_alloc_small_buf(size, ierr)
subroutine, public cmumps_blr_struc_to_mod(id_blrarray_encoding)
subroutine, public cmumps_blr_mod_to_struc(id_blrarray_encoding)
subroutine, public cmumps_ooc_init_solve(id)
Definition cmumps_ooc.F:593
subroutine, public cmumps_ooc_end_solve(ierr)
subroutine cmumps_init_fact_area_size_s(la)
Definition cmumps_ooc.F:109
subroutine cmumps_compute_memory_save(id, total_file_size, total_struc_size)
integer(8), public pruned_size_loaded
subroutine, public cmumps_sol_es_init(size_of_block_arg, keep201)
subroutine cmumps_sol_l0omp_li(k400)
Definition csol_omp_m.F:21
subroutine cmumps_sol_l0omp_ld(k400)
Definition csol_omp_m.F:34
subroutine, public mumps_fdm_mod_to_struc(what, id_fdm_encoding, info)
subroutine, public mumps_fdm_struc_to_mod(what, id_fdm_encoding)
subroutine mumps_sol_rhsmapinfo(n, nloc_rhs, info23, irhs_loc, map_rhs_loc, posinrhscomp_fwd, nslaves, myid_nodes, comm_nodes, icntl, info)
Definition sol_common.F:106
subroutine arret(nn)
Definition arret.F:87
subroutine mumps_secfin(t)
integer function mumps_procnode(procinfo_inode, k199)
subroutine mumps_set_ierror(size8, ierror)
subroutine mumps_mem_centralize(myid, comm, info, infog, irank)
subroutine mumps_secdeb(t)
void file_size(int *filesize)