228 1 NDDL ,NNZ ,IADK ,JDIK ,DIAG_K ,
229 2 LT_K ,NDDLI ,ITOK ,IADI ,JDII ,
230 3 LT_I ,NNZM ,IADM ,JDIM ,DIAG_M ,
231 4 LT_M ,X ,R ,ITOL ,TOL ,
232 5 P ,Z ,Y ,ITASK ,IPRINT ,
233 6 N_MAX ,EPS_M ,F_X ,ISTOP ,W_DDL ,
234 8 A ,AR ,VE ,MS ,XE ,
235 9 D ,DR ,NDOF ,IPARI ,INTBUF_TAB,
236 A NUM_IMP,NS_IMP,NE_IMP,NSREM ,
237 B NSL ,NMONV ,IMONV ,MONVOL,IGRSURF ,
238 C VOLMON,FR_MV ,IBFV ,SKEW ,
239 D XFRAME ,GRAPHE,IAD_ELEM,FR_ELEM,ITAB ,
240 E INSOLV ,ITN ,FAC_K ,IPIV_K,NK ,
241 F MUMPS_PAR,CDDLP,ISOLV,IDSC ,IDDL ,
242 G IKC ,INLOC ,IND_IMP,XI_C ,R0 ,
243 H NDDLI_G,INTP_C,IRBE3 ,LRBE3 ,IRBE2 ,
258#include "implicit_f.inc"
262#include "comlock.inc"
263#include "com04_c.inc"
264#include "units_c.inc"
267#include "dmumps_struc.h"
274 . nddli ,itok(*) ,iadi(*),jdii(*),n_max,
275 . nnzm ,iprec,itask,iprint,
276 . istop,w_ddl(*),ibfv(*),intp_c,irbe3(*) ,lrbe3(*),
278 INTEGER NDOF(*),NE_IMP(*),NSREM ,NSL,
279 . ipari(*) ,num_imp(*),ns_imp(*) ,ind_imp(*)
280 INTEGER NMONV,IMONV(*),MONVOL(*),FR_MV(*)
281 INTEGER (2,*), FR_ELEM(*), ITAB(*),
282 . INSOLV, ITN, IPIV_K(*), NK, CDDLP(*), ISOLV, IDSC,
283 . IDDL(*), IKC(*), INLOC(*),NDDLI_G, MICID
285 . LT_I(*), TOL, EPS_M, F_X, XI_C(*)
287 INTEGER ,
target,
intent(inout) ::
288 . IADK(NDDL+1) ,JDIK(NNZ), IADM(NDDL+1), JDIM(NNZM)
289 my_real ,
target,
intent(inout) ::
290 . DIAG_K(NDDL), LT_K(NNZ) ,DIAG_M(NDDL),LT_M(NNZM) ,X(NDDL),
291 . P(NDDL) ,Z(NDDL) ,R(NDDL) ,Y(NDDL)
294 . IADK(*) ,JDIK(*), IADM(*),JDIM(*)
296 . DIAG_K(*), LT_K(*) ,DIAG_M(*),LT_M(*) ,X(*),
297 . P(*) ,Z(*) ,R(*) ,Y(*)
300 . a(3,*),ar(3,*),ve(3,*),d(3,*),dr(3,*),xe(3,*),
301 . ms(*),volmon(*),skew(*) ,xframe(*),fac_k(*),
306 TYPE(dmumps_struc) MUMPS_PAR
312 TYPE(intbuf_struct_) INTBUF_TAB(*)
313 TYPE (SURF_) ,
DIMENSION(NSURF) :: IGRSURF
323 INTEGER I,J,IT,IP,,ND,IERR,IPRI,IERROR,NNZI,LOC_PROC,
329 . s , r2, r02,
alpha,beta,g0,g1,rr,tols,toln,tols2
330 INTEGER CRIT_STOP,IUPD,IUPD0,F_DDL,L_DDL,IFLG,GPUR4R8,
331 . itp,lcom,nddl1,lcomi,lcomx,k,
332 . ntag(nddl),igather(nddl)
336 . anorm2,xnorm2,l_a,l_b2,l_b,a_old,b_old,tmp,r2_old,rmax
338 . cs,dbar, delta, denom, kcond,snprod,qrnorm,
339 . gamma, gbar, gmax, gmin, epsln,lqnorm,diag,cgnorm,
340 . oldb, rhs1, rhs2,sn, zbar, zl ,oldb2,tnorm2,eps(4)
344 real*4,
DIMENSION(:),
ALLOCATABLE ::
345 . lt_k_sp, lt_k0_sp, lt_m_sp, lt_m0_sp, diag_k_sp, diag_m_sp,
346 . x_sp, r_sp,w_sp, lt_i0_sp
348 my_real,
DIMENSION(:),
ALLOCATABLE :: wr
349 INTEGER,
DIMENSION(:),
ALLOCATABLE :: IFRTMP, TABLE, INDTMP
351 my_real,
DIMENSION(:),
ALLOCATABLE :: w, wr, vgat, vsca, xir, yir
352 INTEGER,
DIMENSION(:),
ALLOCATABLE :: IFRTMP, INDEX, TABLE, INDTMP
369 DATA warr /
'**WARRING**'/
386 lcom = lcom +
nd_fr(i)
441 f_ddl=1+itask*nddl/nthread
442 l_ddl=(itask+1)*nddl/nthread
450 IF (ispmd/=0) ipri = 0
454 1 nddl,x,r,diag_k,diag_m,
455 2 nnz,lt_k,lt_k0,lt_m,lt_m0,
458 5 nlim,itol,eps_m,iprec)
460 1 nddl,r,diag_k,nnz,lt_k,
465 WRITE(iout,*)
' *** BEGIN CONJUGATE GRADIENT ITERATION ***'
469 WRITE(istdo,*)
' *** BEGIN CONJUGATE GRADIENT ITERATION ***'
492 1 graphe,iad_elem,fr_elem,diag_k,lt_k ,
493 2 iadk ,jdik ,itab ,ipri,insolv ,
494 3 itn ,fac_k , ipiv_k, nk ,mumps_par,
495 4 cddlp ,isolv , idsc , iddl ,ikc ,
496 5 inloc ,ndof , nddl ,nnzm ,iadm ,
497 6 jdim ,diag_m , lt_m ,r ,z ,
505 CALL produt_h(f_ddl ,l_ddl ,r ,z ,w_ddl , g0 ,itask )
508 1 nddl ,nddli ,iadk ,jdik ,diag_k,
509 2 lt_k ,iadi ,jdii ,itok ,lt_i ,
511 5 ve ,ms ,xe ,d ,dr ,
512 6 ndof ,ipari ,intbuf_tab ,num_imp,
513 7 ns_imp,ne_imp,nsrem ,nsl ,ibfv ,
514 8 skew ,xframe,monvol,volmon,igrsurf ,
515 9 fr_mv,nmonv ,imonv ,ind_imp,
516 a xi_c ,iupd ,irbe3 ,lrbe3 ,irbe2 ,
517 b lrbe2 ,f_ddl ,l_ddl ,itask )
521 CALL produt_h(f_ddl ,l_ddl ,p ,y ,w_ddl , s ,itask )
535 CALL produt_h( f_ddl ,l_ddl ,r ,r ,w_ddl , r02 ,itask )
538 ELSEIF (itol==3)
THEN
550 ELSEIF (itol==4)
THEN
563 IF(ipri/=0.AND.itask==0)
THEN
565 IF(ipri<0)
WRITE(istdo,1000)it,rr
566 WRITE(iout,1000)it,rr
574 x(i) = x(i) +
alpha*p(i)
575 r(i) = r(i) -
alpha*y(i)
581 1 graphe,iad_elem,fr_elem,diag_k,lt_k ,
582 2 iadk ,jdik ,itab ,ipri,insolv ,
583 3 itn ,fac_k , ipiv_k, nk ,mumps_par,
584 4 cddlp ,isolv , idsc , iddl ,ikc ,
585 5 inloc ,ndof , nddl ,nnzm ,iadm ,
586 6 jdim ,diag_m , lt_m ,r ,z ,
591 CALL produt_h( f_ddl ,l_ddl ,r ,z ,w_ddl , g1 ,itask )
595 CALL produt_h( f_ddl ,l_ddl ,r ,r ,w_ddl , r2 ,itask )
596 ELSEIF (itol==3)
THEN
599 l_b2=abs(beta)*a_old*a_old
608 gmax = abs( l_a ) + eps_m
612 ELSEIF (itol==4)
THEN
618 IF (itol==3) toln=toln*tnorm2
623 istop=crit_stop(it,r2,nlim,toln)
624#include "lockoff.inc"
625 IF(nddli_g>0.AND.intp_c==-1)iupd0
628 p(i) = z(i) + beta*p(i)
630 IF(iupd0>0.AND.it>nddli_g/2) iupd = iupd0
635 1 nddl ,nddli ,iadk ,jdik ,diag_k,
636 2 lt_k ,iadi ,jdii ,itok ,lt_i ,
638 5 ve ,ms ,xe ,d ,dr ,
639 6 ndof ,ipari ,intbuf_tab ,num_imp,
640 7 ns_imp,ne_imp,nsrem ,nsl ,ibfv ,
641 8 skew ,xframe,monvol,volmon,igrsurf ,
642 9 fr_mv,nmonv ,imonv ,ind_imp,
643 a xi_c ,iupd ,irbe3 ,lrbe3 ,irbe2 ,
644 b lrbe2 ,f_ddl ,l_ddl ,itask )
648 CALL produt_h(f_ddl ,l_ddl ,p ,y ,w_ddl , s ,itask )
652 x(i) = x(i) +
alpha*p(i)
653 r(i) = r(i) -
alpha*y(i)
659 1 graphe,iad_elem,fr_elem,diag_k,lt_k ,
660 2 iadk ,jdik ,itab ,ipri,insolv ,
661 3 itn ,fac_k , ipiv_k, nk ,mumps_par,
662 4 cddlp ,isolv , idsc , iddl ,ikc ,
663 5 inloc ,ndof , nddl ,nnzm ,iadm ,
664 6 jdim ,diag_m , lt_m ,r ,z ,
669 CALL produt_h(f_ddl ,l_ddl ,r ,z ,w_ddl , g1
674 CALL produt_h( f_ddl ,l_ddl ,r ,r ,w_ddl , r2 ,itask )
675 ELSEIF (itol==3)
THEN
685 tnorm2=tnorm2+l_a*l_a+oldb2+l_b2
686 gamma = sqrt( gbar*gbar + oldb2 )
689 delta = cs * dbar + sn * l_a
690 gbar = sn * dbar - cs * l_a
694 xnorm2 = xnorm2+zl*zl
695 gmax =
max( gmax, gamma )
696 gmin =
min( gmin, gamma )
697 rhs1 = rhs2 - delta * zl
699 toln=tols2*anorm2*xnorm2
702 ELSEIF (itol==4)
THEN
721 IF(ipri/=0.AND.itask==0)
THEN
722 IF(mod(it,ip)==0)
THEN
724 WRITE(iout,1001)it,rr
725 IF(ipri<0)
WRITE(istdo,1001)it,rr
732 istop=crit_stop(it,r2,nlim,toln)
733#include "lockoff.inc"
735 IF((iupd>0.AND.it==nlim).OR.
736 . (iupd==0.AND.istop/=1.AND.iupd0>0))
THEN
742#include "lockoff.inc"
752 1 nddl ,nddli ,iadk ,jdik ,diag_k,
753 2 lt_k ,iadi ,jdii ,itok ,lt_i ,
755 5 ve ,ms ,xe ,d ,dr ,
756 6 ndof ,ipari ,intbuf_tab ,num_imp,
757 7 ns_imp,ne_imp,nsrem ,nsl ,ibfv ,
758 8 skew ,xframe,monvol,volmon,igrsurf ,
759 9 fr_mv,nmonv ,imonv ,ind_imp,
760 a xi_c ,iupd ,irbe3 ,lrbe3 ,irbe2 ,
761 b lrbe2 ,f_ddl ,l_ddl ,itask )
772 1 graphe,iad_elem,fr_elem,diag_k,lt_k ,
773 2 iadk ,jdik ,itab ,ipri,insolv ,
774 3 itn ,fac_k , ipiv_k, nk
775 4 cddlp ,isolv , idsc , iddl ,ikc
776 5 inloc ,ndof , nddl ,nnzm ,iadm ,
777 6 jdim ,diag_m , lt_m ,r ,z ,
782 CALL produt_h( f_ddl ,l_ddl ,r ,z ,w_ddl , g0 ,itask )
797 gmax = abs( l_a ) + eps_m
801 ELSEIF (itol==4)
THEN
816 ELSEIF(gpur4r8==2)
THEN
827 . nspmd,loc_proc,l_spmd(loc_proc),idevice,micid,ierr)
830 CALL ancmsg(msgid=223,anmode=aninfo_blind)
832 CALL ancmsg(msgid=224,anmode=aninfo_blind)
834 CALL ancmsg(msgid=225,anmode=aninfo_blind)
839 print *,
' INIT MIC CARD NUMBER ',micid
855 i= omp_get_num_threads()
856 print *,
'Number of threads (host):',i
858 i= kmp_get_blocktime_target(target_mic, micid)
862!dec$ attributes offload: mic :: mic_dcopy
866 & in(lt_k:length(nnz), free_if(.false.), align(512))
870 print *,
' Variables1= ',nnz
872 & in(jdik:length(nnz), free_if(.false.), align(512))
876 print *,
' Variables2= ',nnz
878 & in(lt_k0:length(nnz), free_if(.false.), align(512))
882 print *,
' Variables3= ',nnz
884 & in(
jdik0:length(nnz), free_if(.false.), align(512))
888 print *,
' Variables4= ',nnz
891& in(lt_m:length(nnzm), free_if(.false.), align(512))
895 print *,
' Variables5= ',nnzm
897 & in(jdim:length(nnzm), free_if(.false.), align(512))
901 print *,
' Variables6= ',nnzm
903 & in(lt_m0:length(nnzm), free_if(.false.), align(512))
907 print *,
' Variables7= ',nnzm
908!dir$ omp offload target(mic:micid)
909 & in(
jdim0:length(nnzm), free_if(.false.), align(512))
913 print *,
' Variables8= ',nnzm
916 & in(diag_k,diag_m,x,y,z,r,p:length(nddl),
921 print *,
' Variables9= ',nddl*7
924 & free_if(.false.), align(512))
928 print *,
' Variables10= ',nddl1*4
929 tf=omp_get_wtime()-tf
930 print *,
'Transfer Time CPU => MIC (s) =',tf
931 print *,
'Transfer Rate CPU => MIC (MB/s)=',
932 . ((tb/(1024*1024))*8)/tf
934 ALLOCATE(w(nddl),stat=ierror)
936 ALLOCATE(vgat(lcom),stat=ierr)
938 ALLOCATE(vsca(lcom),stat=ierr)
940 ALLOCATE(index(lcom),stat=ierr)
942 ALLOCATE(table(nddl),stat=ierr)
966 & in(w:length(nddl), free_if(.false.), align(512))
969 print *,
' Variables11= ',i
971 & in(
ifr2k,index:length(lcom), free_if(.false.))
975 print *,' variables12=
',i
979!dir$ omp offload target(mic:MICID)
980 & nocopy(IADK,IADK0,IADM,IADM0,LT_K,LT_K0,
981 & lt_m,lt_m0,JDIK,JDIK0,JDIM,JDIM0,
982 & DIAG_K,DIAG_M,x,y,z,r,p,w)
984!$OMP PARALLEL default(shared)
989 i= omp_get_num_threads()
990 print *,'number of threads(mic):
',i
998 CALL MIC_MV(NDDL,X,DIAG_K,Z)
1001 CALL MIC_SPMV(NDDL ,Z ,X ,LT_K ,IADK ,JDIK )
1003 CALL MIC_SPMV(NDDL ,Z ,X ,LT_K0,IADK0,JDIK0)
1009 CALL MIC_DAXPY(NDDL, -ONE, Z, R)
1019 CALL MIC_DCOPY(NDDL, R, Z)
1020 ELSEIF(IPREC == 2)THEN
1022 CALL MIC_MV(NDDL,R,DIAG_M,Z)
1023 ELSEIF(IPREC == 5)THEN
1025 CALL MIC_DCOPY(NDDL, R, Y)
1027 CALL MIC_SPMV(NDDL ,Y ,R ,LT_M ,IADM ,JDIM )
1029 CALL MIC_MV(NDDL,Y,DIAG_M,Z)
1031 CALL MIC_DCOPY(NDDL, Z, Y)
1033 CALL MIC_SPMV(NDDL ,Z ,Y ,LT_M0,IADM0,JDIM0)
1039.AND.
IF(NSPMD > 1 IPREC > 1)THEN
1040 IF(IMONM > 0) CALL STARTIME(TIMERS,68)
1041!dir$ omp offload target(mic:MICID)
1043 & out(vgat:length(lcom))
1044!$OMP PARALLEL default(shared)
1045 CALL MIC_GATHER(NDDL, LCOM, Z, VGAT, IFR2K)
1047 IF(IMONM > 0) CALL STOPTIME(TIMERS,68)
1048 IF(IMONM > 0) CALL STARTIME(TIMERS,66)
1049 CALL SPMD_SUMFC_V(VGAT,VSCA,INDEX,LCOM)
1050 IF(IMONM > 0) CALL STOPTIME(TIMERS,66)
1051 IF(IMONM > 0) CALL STARTIME(TIMERS,69)
1052!dir$ omp offload target(mic:MICID)
1053 & nocopy(z,ifr2k,index)
1054 & in(vsca:length(lcom))
1055!$OMP PARALLEL default(shared)
1056 CALL MIC_SCATTER(NDDL, LCOM, Z, VSCA, IFR2K, INDEX)
1058 IF(IMONM > 0) CALL STOPTIME(TIMERS,69)
1061!dir$ omp offload target(mic:MICID)
1063!$OMP PARALLEL default(shared)
1065 CALL MIC_DCOPY(NDDL, Z, P)
1069!dir$ omp offload target(mic:MICID)
1072!$OMP PARALLEL default(shared) shared(g0)
1073 CALL MIC_DDOT(NDDL, R, Z, G0)
1078!dir$ omp offload target(mic:MICID)
1081!$OMP PARALLEL default(shared) shared(g0)
1082 CALL MIC_MV(NDDL,Z,W,Y)
1083 CALL MIC_DDOT(NDDL, R, Y, G0)
1085 IF(IMONM > 0) CALL STARTIME(TIMERS,67)
1087 IF(IMONM > 0) CALL STOPTIME(TIMERS,67)
1090!dir$ omp offload target(mic:MICID)
1091 & nocopy(iadk,iadk0,iadm,iadm0,lt_k,lt_k0,lt_m)
1092 & nocopy(lt_m0,jdik,jdik0,jdim,jdim0,diag_k,diag_m,y,p,x,r,z)
1093!$OMP PARALLEL default(shared)
1100 CALL MIC_MV(NDDL,P,DIAG_K,Y)
1102 CALL MIC_SPMV(NDDL ,Y ,P ,LT_K ,IADK ,JDIK )
1104 CALL MIC_SPMV(NDDL ,Y ,P ,LT_K0,IADK0,JDIK0)
1112 IF(IMONM > 0) CALL STARTIME(TIMERS,68)
1113!dir$ omp offload target(mic:MICID)
1115 & out(vgat:length(lcom))
1116!$OMP PARALLEL default(shared)
1117 CALL MIC_GATHER(NDDL, LCOM, Y, VGAT, IFR2K)
1119 IF(IMONM > 0) CALL STOPTIME(TIMERS,68)
1120 IF(IMONM > 0) CALL STARTIME(TIMERS,66)
1121 CALL SPMD_SUMFC_V(VGAT,VSCA,INDEX,LCOM)
1122 IF(IMONM > 0) CALL STOPTIME(TIMERS,66)
1123 IF(IMONM > 0) CALL STARTIME(TIMERS,69)
1124!dir$ omp offload target(mic:MICID)
1125 & nocopy(y,ifr2k,index)
1126 & in(vsca:length(lcom))
1127!$OMP PARALLEL default(shared)
1128 CALL MIC_SCATTER(NDDL, LCOM, Y, VSCA, IFR2K, INDEX)
1130 IF(IMONM > 0) CALL STOPTIME(TIMERS,69)
1134!dir$ omp offload target(mic:MICID)
1137!$OMP PARALLEL default(shared) shared(s)
1138 CALL MIC_DDOT(NDDL, P, Y, S)
1142!dir$ omp offload target(mic:MICID)
1145!$OMP PARALLEL default(shared) shared(s)
1146 CALL MIC_MV(NDDL,Y,W,Z)
1147 CALL MIC_DDOT(NDDL, P, Z, S)
1149 IF(IMONM > 0) CALL STARTIME(TIMERS,67)
1151 IF(IMONM > 0) CALL STOPTIME(TIMERS,67)
1168!dir$ omp offload target(mic:MICID)
1171!$OMP PARALLEL default(shared) shared(r02)
1172 CALL MIC_DDOT(NDDL, R, R, R02)
1176!dir$ omp offload target(mic:MICID)
1179!$OMP PARALLEL default(shared) shared(r02)
1180 CALL MIC_MV(NDDL,R,W,Z)
1181 CALL MIC_DDOT(NDDL, Z, Z, R02)
1183 IF(IMONM > 0) CALL STARTIME(TIMERS,67)
1184 CALL SPMD_SUM_S(R02)
1185 IF(IMONM > 0) CALL STOPTIME(TIMERS,67)
1190 ELSEIF (ITOL==3) THEN
1202 ELSEIF (ITOL==4) THEN
1203 R02=ALPHA*ALPHA*ABS(G0)
1215.AND.
IF(IPRI/=0ITASK==0)THEN
1217 IF(IPRI<0) WRITE(ISTDO,1000)IT,RR
1218 WRITE(IOUT,1000)IT,RR
1225!dir$ omp offload target(mic:MICID)
1226 & nocopy(iadk,iadk0,iadm,iadm0,lt_k,lt_k0,lt_m)
1227 & nocopy(lt_m0,jdik,jdik0,jdim,jdim0,diag_k,diag_m,y,p,x,r,z)
1228!$OMP PARALLEL default(shared)
1229 CALL MIC_DAXPY(NDDL, ALPHA, P, X)
1230 CALL MIC_DAXPY(NDDL,-ALPHA, Y, R)
1240 CALL MIC_DCOPY(NDDL, R, Z)
1241 ELSEIF(IPREC == 2)THEN
1243 CALL MIC_MV(NDDL,R,DIAG_M,Z)
1244 ELSEIF(IPREC == 5)THEN
1246 CALL MIC_DCOPY(NDDL, R, Y)
1248 CALL MIC_SPMV(NDDL ,Y ,R ,LT_M ,IADM ,JDIM )
1250 CALL MIC_MV(NDDL,Y,DIAG_M,Z)
1251 CALL MIC_DCOPY(NDDL, Z, Y)
1253 CALL MIC_SPMV(NDDL ,Z ,Y ,LT_M0,IADM0,JDIM0)
1258.AND.
IF(NSPMD > 1 IPREC > 1)THEN
1259 IF(IMONM > 0) CALL STARTIME(TIMERS,68)
1260!dir$ omp offload target(mic:MICID)
1262 & out(vgat:length(lcom))
1263!$OMP PARALLEL default(shared)
1264 CALL MIC_GATHER(NDDL, LCOM, Z, VGAT, IFR2K)
1266 IF(IMONM > 0) CALL STOPTIME(TIMERS,68)
1267 IF(IMONM > 0) CALL STARTIME(TIMERS,66)
1268 CALL SPMD_SUMFC_V(VGAT,VSCA,INDEX,LCOM)
1269 IF(IMONM > 0) CALL STOPTIME(TIMERS,66)
1270 IF(IMONM > 0) CALL STARTIME(TIMERS,69)
1271!dir$ omp offload target(mic:MICID)
1272 & nocopy(z,ifr2k,index)
1273 & in(vsca:length(lcom))
1274!$OMP PARALLEL default(shared)
1275 CALL MIC_SCATTER(NDDL, LCOM, Z, VSCA, IFR2K, INDEX)
1277 IF(IMONM > 0) CALL STOPTIME(TIMERS,69)
1281!dir$ omp offload target(mic:MICID)
1284!$OMP PARALLEL default(shared) shared(G1)
1285 CALL MIC_DDOT(NDDL, R, Z, G1)
1289!dir$ omp offload target(mic:MICID)
1292!$OMP PARALLEL default(shared) shared(G1)
1293 CALL MIC_MV(NDDL,Z,W,Y)
1294 CALL MIC_DDOT(NDDL, R, Y, G1)
1296 IF(IMONM > 0) CALL STARTIME(TIMERS,67)
1298 IF(IMONM > 0) CALL STOPTIME(TIMERS,67)
1304!dir$ omp offload target(mic:MICID)
1307!$OMP PARALLEL default(shared) shared(R2)
1308 CALL MIC_DDOT(NDDL, R, R, R2)
1312!dir$ omp offload target(mic:MICID)
1315!$OMP PARALLEL default(shared) shared(R2)
1316 CALL MIC_MV(NDDL,R,W,Y)
1317 CALL MIC_DDOT(NDDL, Y, Y, R2)
1319 IF(IMONM > 0) CALL STARTIME(TIMERS,67)
1321 IF(IMONM > 0) CALL STOPTIME(TIMERS,67)
1323 ELSEIF (ITOL==3) THEN
1326 L_B2=ABS(BETA)*A_OLD*A_OLD
1335 GMAX = ABS( L_A ) + EPS_M
1339 ELSEIF (ITOL==4) THEN
1345 IF (ITOL==3) TOLN=TOLN*TNORM2
1349 ISTOP=CRIT_STOP(IT,R2,NLIM,TOLN)
1357!dir$ omp offload target(mic:MICID)
1358 & nocopy(iadk,iadk0,iadm,iadm0,lt_k,lt_k0,lt_m)
1359 & nocopy(lt_m0,jdik,jdik0,jdim,jdim0,diag_k,diag_m,y,p,x,r,z)
1360!$OMP PARALLEL default(shared)
1363 CALL MIC_DSCAL(NDDL, BETA, P)
1364 CALL MIC_DAXPY(NDDL, ONE, Z, P)
1372 CALL MIC_MV(NDDL,P,DIAG_K,Y)
1374 CALL MIC_SPMV(NDDL ,Y ,P ,LT_K ,IADK ,JDIK )
1376 CALL MIC_SPMV(NDDL ,Y ,P ,LT_K0,IADK0,JDIK0)
1383 IF(IMONM > 0) CALL STARTIME(TIMERS,68)
1384!dir$ omp offload target(mic:MICID)
1386 & out(vgat:length(lcom))
1387!$OMP PARALLEL default(shared)
1388 CALL MIC_GATHER(NDDL, LCOM, Y, VGAT, IFR2K)
1390 IF(IMONM > 0) CALL STOPTIME(TIMERS,68)
1391 IF(IMONM > 0) CALL STARTIME(TIMERS,66)
1392 CALL SPMD_SUMFC_V(VGAT,VSCA,INDEX,LCOM)
1393 IF(IMONM > 0) CALL STOPTIME(TIMERS,66)
1394 IF(IMONM > 0) CALL STARTIME(TIMERS,69)
1395!dir$ omp offload target(mic:MICID)
1396 & nocopy(y,ifr2k,index)
1397 & in(vsca:length(lcom))
1398!$OMP PARALLEL default(shared)
1399 CALL MIC_SCATTER(NDDL, LCOM, Y, VSCA, IFR2K, INDEX)
1401 IF(IMONM > 0) CALL STOPTIME(TIMERS,69)
1405!dir$ omp offload target(mic:MICID)
1408!$OMP PARALLEL default(shared) shared(S)
1409 CALL MIC_DDOT(NDDL, P, Y, S)
1413!dir$ omp offload target(mic:MICID)
1416!$OMP PARALLEL default(shared) shared(S)
1417 CALL MIC_MV(NDDL,Y,W,Z)
1418 CALL MIC_DDOT(NDDL, P, Z, S)
1420 IF(IMONM > 0) CALL STARTIME(TIMERS,67)
1422 IF(IMONM > 0) CALL STOPTIME(TIMERS,67)
1426!dir$ omp offload target(mic:MICID)
1427 & nocopy(iadk,iadk0,iadm,iadm0,lt_k,lt_k0,lt_m)
1428 & nocopy(lt_m0,jdik,jdik0,jdim,jdim0,diag_k,diag_m,y,p,x,r,z)
1429!$OMP PARALLEL default(shared)
1430 CALL MIC_DAXPY(NDDL, ALPHA, P, X)
1431 CALL MIC_DAXPY(NDDL,-ALPHA, Y, R)
1438 CALL MIC_DCOPY(NDDL, R, Z)
1439 ELSEIF(IPREC == 2)THEN
1441 CALL MIC_MV(NDDL,R,DIAG_M,Z)
1442 ELSEIF(IPREC == 5)THEN
1444 CALL MIC_DCOPY(NDDL, R, Y)
1446 CALL MIC_SPMV(NDDL ,Y ,R ,LT_M ,IADM ,JDIM )
1448 CALL MIC_MV(NDDL,Y,DIAG_M,Z)
1449 CALL MIC_DCOPY(NDDL, Z, Y)
1451 CALL MIC_SPMV(NDDL ,Z ,Y ,LT_M0,IADM0,JDIM0)
1456.AND.
IF(NSPMD > 1 IPREC > 1)THEN
1457 IF(IMONM > 0) CALL STARTIME(TIMERS,68)
1458!dir$ omp offload target(mic:MICID)
1460 & out(vgat:length(lcom))
1461!$OMP PARALLEL default(shared)
1462 CALL MIC_GATHER(NDDL, LCOM, Z, VGAT, IFR2K)
1464 IF(IMONM > 0) CALL STOPTIME(TIMERS,68)
1465 IF(IMONM > 0) CALL STARTIME(TIMERS,66)
1466 CALL SPMD_SUMFC_V(VGAT,VSCA,INDEX,LCOM)
1467 IF(IMONM > 0) CALL STOPTIME(TIMERS,66)
1468 IF(IMONM > 0) CALL STARTIME(TIMERS,69)
1469!dir$ omp offload target(mic:MICID)
1470 & nocopy(z,ifr2k,index)
1471 & in(vsca:length(lcom))
1472!$OMP PARALLEL default(shared)
1473 CALL MIC_SCATTER(NDDL, LCOM, Z, VSCA, IFR2K, INDEX)
1475 IF(IMONM > 0) CALL STOPTIME(TIMERS,69)
1479!dir$ omp offload target(mic:MICID)
1482!$OMP PARALLEL default(shared) shared(G1)
1483 CALL MIC_DDOT(NDDL, R, Z, G1)
1487!dir$ omp offload target(mic:MICID)
1490!$OMP PARALLEL default(shared) shared(G1)
1491 CALL MIC_MV(NDDL,Z,W,Y)
1492 CALL MIC_DDOT(NDDL, R, Y, G1)
1494 IF(IMONM > 0) CALL STARTIME(TIMERS,67)
1496 IF(IMONM > 0) CALL STOPTIME(TIMERS,67)
1505!dir$ omp offload target(mic:MICID)
1508!$OMP PARALLEL default(shared) shared(R2)
1509 CALL MIC_DDOT(NDDL, R, R, R2)
1513!dir$ omp offload target(mic:MICID)
1516!$OMP PARALLEL default(shared) shared(R2)
1517 CALL MIC_MV(NDDL,R,W,Y)
1518 CALL MIC_DDOT(NDDL, Y, Y, R2)
1520 IF(IMONM > 0) CALL STARTIME(TIMERS,67)
1522 IF(IMONM > 0) CALL STOPTIME(TIMERS,67)
1524 ELSEIF (ITOL==3) THEN
1534 TNORM2=TNORM2+L_A*L_A+OLDB2+L_B2
1535 GAMMA = SQRT( GBAR*GBAR + OLDB2 )
1538 DELTA = CS * DBAR + SN * L_A
1539 GBAR = SN * DBAR - CS * L_A
1543 XNORM2 = XNORM2+ZL*ZL
1544 GMAX = MAX( GMAX, GAMMA )
1545 GMIN = MIN( GMIN, GAMMA )
1546 RHS1 = RHS2 - DELTA * ZL
1548 TOLN=TOLS2*ANORM2*XNORM2
1551 ELSEIF (ITOL==4) THEN
1552 TMP=ALPHA*ALPHA*ABS(G1)
1569.AND.
IF(IPRI/=0ITASK==0)THEN
1570 IF(MOD(IT,IP)==0)THEN
1572 WRITE(IOUT,1001)IT,RR
1573 IF(IPRI<0) WRITE(ISTDO,1001)IT,RR
1579 ISTOP=CRIT_STOP(IT,R2,NLIM,TOLN)
1587 WRITE(IOUT,1001)IT-1,RR
1592 tt=OMP_GET_WTIME()-tt
1593 i= omp_get_num_threads()
1596 print *,' execution time on mic with
',i,' threads
'
1597 print *,' ==>
',tt,' seconds
'
1606 ELSE ! CODE NSPMD = 1
1608!dec$ attributes offload: mic :: ONE,ZERO,ISTDO,IOUT
1610!dir$ omp offload target(mic:MICID)
1611 & nocopy(IADK,IADK0,IADM,IADM0,LT_K,LT_K0,
1612 & lt_m,lt_m0,JDIK,JDIK0,JDIM,JDIM0,
1613 & DIAG_K,DIAG_M,x,y,z,r,p,w)
1616!$OMP PARALLEL default(shared)
1620 i= omp_get_num_threads()
1621 print *,'number of threads(mic):
',i
1629 CALL MIC_MV(NDDL,X,DIAG_K,Z)
1632 CALL MIC_SPMV(NDDL ,Z ,X ,LT_K ,IADK ,JDIK )
1634 CALL MIC_SPMV(NDDL ,Z ,X ,LT_K0,IADK0,JDIK0)
1640 CALL MIC_DAXPY(NDDL, -ONE, Z, R)
1649 CALL MIC_DCOPY(NDDL, R, Z)
1650 ELSEIF(IPREC == 2)THEN
1652 CALL MIC_MV(NDDL,R,DIAG_M,Z)
1653 ELSEIF(IPREC == 5)THEN
1655 CALL MIC_DCOPY(NDDL, R, Y)
1657 CALL MIC_SPMV(NDDL ,Y ,R ,LT_M ,IADM ,JDIM )
1659 CALL MIC_MV(NDDL,Y,DIAG_M,Z)
1661 CALL MIC_DCOPY(NDDL, Z, Y)
1663 CALL MIC_SPMV(NDDL ,Z ,Y ,LT_M0,IADM0,JDIM0)
1695 CALL MIC_DCOPY(NDDL, Z, P)
1703 CALL MIC_DDOT(NDDL, R, Z, G0)
1730 CALL MIC_MV(NDDL,P,DIAG_K,Y)
1732 CALL MIC_SPMV(NDDL ,Y ,P ,LT_K ,IADK ,JDIK )
1734 CALL MIC_SPMV(NDDL ,Y ,P ,LT_K0,IADK0,JDIK0)
1768 CALL MIC_DDOT(NDDL, P, Y, S)
1802 CALL MIC_DDOT(NDDL, R, R, R02)
1820 ELSEIF (ITOL==3) THEN
1836 ELSEIF (ITOL==4) THEN
1837 R02=ALPHA*ALPHA*ABS(G0)
1851.AND.
IF(IPRI/=0ITASK==0)THEN
1853 IF(IPRI<0) WRITE(ISTDO,1000)IT,RR
1854 WRITE(IOUT,1000)IT,RR
1867 CALL MIC_DAXPY(NDDL, ALPHA, P, X)
1868 CALL MIC_DAXPY(NDDL,-ALPHA, Y, R)
1878 CALL MIC_DCOPY(NDDL, R, Z)
1879 ELSEIF(IPREC == 2)THEN
1881 CALL MIC_MV(NDDL,R,DIAG_M,Z)
1882 ELSEIF(IPREC == 5)THEN
1884 CALL MIC_DCOPY(NDDL, R, Y)
1886 CALL MIC_SPMV(NDDL ,Y ,R ,LT_M ,IADM ,JDIM )
1888 CALL MIC_MV(NDDL,Y,DIAG_M,Z)
1889 CALL MIC_DCOPY(NDDL, Z, Y)
1891 CALL MIC_SPMV(NDDL ,Z ,Y ,LT_M0,IADM0,JDIM0)
1923 CALL MIC_DDOT(NDDL, R, Z, G1)
1946 CALL MIC_DDOT(NDDL, R, R, R2)
1961 ELSEIF (ITOL==3) THEN
1966 L_B2=ABS(BETA)*A_OLD*A_OLD
1975 GMAX = ABS( L_A ) + EPS_M
1981 ELSEIF (ITOL==4) THEN
1989 IF (ITOL==3) TOLN=TOLN*TNORM2
2019 CALL MIC_DSCAL(NDDL, BETA, P)
2020 CALL MIC_DAXPY(NDDL, ONE, Z, P)
2028 CALL MIC_MV(NDDL,P,DIAG_K,Y)
2030 CALL MIC_SPMV(NDDL ,Y ,P ,LT_K ,IADK ,JDIK )
2032 CALL MIC_SPMV(NDDL ,Y ,P ,LT_K0,IADK0,JDIK0)
2065 CALL MIC_DDOT(NDDL, P, Y, S)
2086 CALL MIC_DAXPY(NDDL, ALPHA, P, X)
2087 CALL MIC_DAXPY(NDDL,-ALPHA, Y, R)
2094 CALL MIC_DCOPY(NDDL, R, Z)
2095 ELSEIF(IPREC == 2)THEN
2097 CALL MIC_MV(NDDL,R,DIAG_M,Z)
2098 ELSEIF(IPREC == 5)THEN
2100 CALL MIC_DCOPY(NDDL, R, Y)
2102 CALL MIC_SPMV(NDDL ,Y ,R ,LT_M ,IADM ,JDIM )
2104 CALL MIC_MV(NDDL,Y,DIAG_M,Z)
2105 CALL MIC_DCOPY(NDDL, Z, Y)
2107 CALL MIC_SPMV(NDDL ,Z ,Y ,LT_M0,IADM0,JDIM0)
2139 CALL MIC_DDOT(NDDL, R, Z, G1)
2167 CALL MIC_DDOT(NDDL, R, R, R2)
2182 ELSEIF (ITOL==3) THEN
2194 TNORM2=TNORM2+L_A*L_A+OLDB2+L_B2
2195 GAMMA = SQRT( GBAR*GBAR + OLDB2 )
2198 DELTA = CS * DBAR + SN * L_A
2199 GBAR = SN * DBAR - CS * L_A
2203 XNORM2 = XNORM2+ZL*ZL
2204 GMAX = MAX( GMAX, GAMMA )
2205 GMIN = MIN( GMIN, GAMMA )
2206 RHS1 = RHS2 - DELTA * ZL
2208 TOLN=TOLS2*ANORM2*XNORM2
2213 ELSEIF (ITOL==4) THEN
2214 TMP=ALPHA*ALPHA*ABS(G1)
2233.AND.
IF(IPRI/=0ITASK==0)THEN
2234 IF(MOD(IT,IP)==0)THEN
2236 WRITE(IOUT,1001)IT,RR
2237 IF(IPRI<0) WRITE(ISTDO,1001)IT,RR
2262 tt=OMP_GET_WTIME()-tt
2263 i= omp_get_num_threads()
2266 print *,' execution time on mic with
',i,' threads
'
2267 print *,' ==>
',tt,' seconds
'
2272 WRITE(IOUT,1001)IT-1,RR
2273 END IF ! fin NSPMD ==1
2275!dec$ omp offload target(mic:MICID) out(X,R:alloc_if(.false.))
2287 TF=OMP_GET_WTIME()-TF
2288 print *,'transfer time mic=>cpu(s) =
',TF
2289 print *,'transfer rate mic=>cpu(mb/s)=
',
2290 . ((2*NDDL/(1024*1024))*8)/TF
2301 END IF ! fin GPUR4R8==2 (MIC double precision)
2308 IF(ISAVE==1)CALL IMP_RSAVE(NDDL,X,R)
2314#include "lockon.inc"
2316#include "lockoff.inc"
2317 CALL PUPD(N_MAX,NDDLI_G,IT)
2321#include "lockon.inc"
2323#include "lockoff.inc"
2325#include "lockon.inc"
2327#include "lockoff.inc"
2329.NOT.
IF (MIXEDSOL()) THEN
2331 WRITE(IOUT,1003)NLIM
2334 WRITE(ISTDO,1003)NLIM
2338 . EXIT, SQRT(ANORM2), SQRT(XNORM2),GMAX/GMIN
2340 . EXIT, SQRT(ANORM2), SQRT(XNORM2),GMAX/GMIN
2343 IF (R2>HUNDRED*TOLN) THEN
2353.NOT.
END IF !((MIXEDSOL)) THEN
2355 END IF !(ISOLV==7) THEN
2360 WRITE(IOUT,1002)IT,RR
2361 IF(ITOL==3)WRITE(IOUT,2000)
2362 . EXIT, SQRT(ANORM2), SQRT(XNORM2),GMAX/GMIN
2366 WRITE(ISTDO,1002)IT,RR
2367 IF(ITOL==3)WRITE(ISTDO,2000)
2368 . EXIT, SQRT(ANORM2), SQRT(XNORM2),GMAX/GMIN
2379 END IF !(ITASK == 0) THEN
2383#include "lockon.inc"
2385#include "lockoff.inc"
2389 1000 FORMAT(5X,'iteration=
',I8,5X,' initial residual
norm=
',E11.4)
2390 1001 FORMAT(5X,'iteration=
',I8,5X,' relative residual
norm=
',E11.4)
2391 1002 FORMAT(3X,'total c.g. iteration=
',I8,5X,
2392 . ' relative residual
norm=
',E11.4)
2394 . '---warning :
the iteration limit number was reached
',I8)
2395 1004 FORMAT(5X,'warning:c.g stop with relative residual
norm=
',E11.4)
2396 2000 FORMAT(/ 5X, A, 2X, 'anorm =
', E12.4, 2X, 'xnorm =
', E12.4,2X,
2398 2002 FORMAT(/ 5X, 'with
', 2X, 'alfa =
', E12.4, 2X, 'beta =
',
2399 . E12.4,2X,'oldb =
', E12.4)