diff --git a/AMSS_NCKU_source/Parallel.C b/AMSS_NCKU_source/Parallel.C index b87ce6d..38278ab 100644 --- a/AMSS_NCKU_source/Parallel.C +++ b/AMSS_NCKU_source/Parallel.C @@ -5253,10 +5253,10 @@ void Parallel::PeriodicBD(Patch *Pat, MyList *VarList, int Symmetry) delete[] transfer_src; delete[] transfer_dst; } -double Parallel::L2Norm(Patch *Pat, var *vf) -{ - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); +double Parallel::L2Norm(Patch *Pat, var *vf) +{ + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); double tvf, dtvf = 0; int BDW = ghost_width; @@ -5281,13 +5281,48 @@ double Parallel::L2Norm(Patch *Pat, var *vf) MPI_Allreduce(&dtvf, &tvf, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); tvf = sqrt(tvf); - - return tvf; -} -double Parallel::L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here) -{ - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + return tvf; +} +void Parallel::L2Norm7(Patch *Pat, var **vf, double *norms) +{ + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + double tvf[7], dtvf[7]; + int BDW = ghost_width; + for (int i = 0; i < 7; i++) + dtvf[i] = 0; + + MyList *BP = Pat->blb; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { + f_l2normhelper7(cg->shape, cg->X[0], cg->X[1], cg->X[2], + Pat->bbox[0], Pat->bbox[1], Pat->bbox[2], + Pat->bbox[3], Pat->bbox[4], Pat->bbox[5], + cg->fgfs[vf[0]->sgfn], cg->fgfs[vf[1]->sgfn], cg->fgfs[vf[2]->sgfn], + cg->fgfs[vf[3]->sgfn], cg->fgfs[vf[4]->sgfn], cg->fgfs[vf[5]->sgfn], + cg->fgfs[vf[6]->sgfn], tvf, BDW); + for (int i = 0; i < 7; i++) + dtvf[i] += tvf[i]; + } + if (BP == Pat->ble) + break; + BP = BP->next; + } + + MPI_Allreduce(dtvf, tvf, 7, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + for (int i = 0; i < 7; i++) + norms[i] = sqrt(tvf[i]); +} +double Parallel::L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here) +{ + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); double tvf, dtvf = 0; int BDW = ghost_width; @@ -5312,12 +5347,47 @@ double Parallel::L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here) MPI_Allreduce(&dtvf, &tvf, 1, MPI_DOUBLE, MPI_SUM, Comm_here); tvf = sqrt(tvf); - - return tvf; -} -void Parallel::checkgsl(MyList *pp, bool first_only) -{ - int myrank = 0; + + return tvf; +} +void Parallel::L2Norm7(Patch *Pat, var **vf, double *norms, MPI_Comm Comm_here) +{ + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + double tvf[7], dtvf[7]; + int BDW = ghost_width; + for (int i = 0; i < 7; i++) + dtvf[i] = 0; + + MyList *BP = Pat->blb; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { + f_l2normhelper7(cg->shape, cg->X[0], cg->X[1], cg->X[2], + Pat->bbox[0], Pat->bbox[1], Pat->bbox[2], + Pat->bbox[3], Pat->bbox[4], Pat->bbox[5], + cg->fgfs[vf[0]->sgfn], cg->fgfs[vf[1]->sgfn], cg->fgfs[vf[2]->sgfn], + cg->fgfs[vf[3]->sgfn], cg->fgfs[vf[4]->sgfn], cg->fgfs[vf[5]->sgfn], + cg->fgfs[vf[6]->sgfn], tvf, BDW); + for (int i = 0; i < 7; i++) + dtvf[i] += tvf[i]; + } + if (BP == Pat->ble) + break; + BP = BP->next; + } + + MPI_Allreduce(dtvf, tvf, 7, MPI_DOUBLE, MPI_SUM, Comm_here); + + for (int i = 0; i < 7; i++) + norms[i] = sqrt(tvf[i]); +} +void Parallel::checkgsl(MyList *pp, bool first_only) +{ + int myrank = 0; MPI_Comm_rank(MPI_COMM_WORLD, &myrank); if (myrank == 0) { diff --git a/AMSS_NCKU_source/Parallel.h b/AMSS_NCKU_source/Parallel.h index 0ab975c..d2b268f 100644 --- a/AMSS_NCKU_source/Parallel.h +++ b/AMSS_NCKU_source/Parallel.h @@ -179,12 +179,13 @@ namespace Parallel MyList *clone_gsl(MyList *p, bool first_only); MyList *build_bulk_gsl(Patch *Pat); // similar to build_owned_gsl0 but does not care rank issue MyList *build_bulk_gsl(Block *bp, Patch *Pat); - void build_PhysBD_gstl(Patch *Pat, MyList *srci, MyList *dsti, - MyList **out_src, MyList **out_dst); - void PeriodicBD(Patch *Pat, MyList *VarList, int Symmetry); - double L2Norm(Patch *Pat, var *vf); - void checkgsl(MyList *pp, bool first_only); - void checkvarl(MyList *pp, bool first_only); + void build_PhysBD_gstl(Patch *Pat, MyList *srci, MyList *dsti, + MyList **out_src, MyList **out_dst); + void PeriodicBD(Patch *Pat, MyList *VarList, int Symmetry); + double L2Norm(Patch *Pat, var *vf); + void L2Norm7(Patch *Pat, var **vf, double *norms); + void checkgsl(MyList *pp, bool first_only); + void checkvarl(MyList *pp, bool first_only); MyList *divide_gsl(MyList *p, Patch *Pat); MyList *divide_gs(MyList *p, Patch *Pat); void prepare_inter_time_level(Patch *Pat, @@ -216,11 +217,12 @@ namespace Parallel void aligncheck(double *bbox0, double *bboxl, int lev, double *DH0, int *shape); bool point_locat_gsl(double *pox, MyList *gsl); void checkpatchlist(MyList *PatL, bool buflog); - - double L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here); - bool PatList_Interp_Points(MyList *PatL, MyList *VarList, - int NN, double **XX, - double *Shellf, int Symmetry, MPI_Comm Comm_here); + + double L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here); + void L2Norm7(Patch *Pat, var **vf, double *norms, MPI_Comm Comm_here); + bool PatList_Interp_Points(MyList *PatL, MyList *VarList, + int NN, double **XX, + double *Shellf, int Symmetry, MPI_Comm Comm_here); #if (PSTR == 1 || PSTR == 2 || PSTR == 3) MyList *distribute(MyList *PatchLIST, int cpusize, int ingfsi, int fngfsi, bool periodic, int start_rank, int end_rank, int nodes = 0); diff --git a/AMSS_NCKU_source/ShellPatch.C b/AMSS_NCKU_source/ShellPatch.C index 50ad334..2f092d5 100644 --- a/AMSS_NCKU_source/ShellPatch.C +++ b/AMSS_NCKU_source/ShellPatch.C @@ -3439,10 +3439,10 @@ void ShellPatch::write_Pablo_file_ss(int *ext, double xmin, double xmax, double delete[] Z; } -double ShellPatch::L2Norm(var *vf) -{ - double tvf, dtvf = 0; - int BDW = overghost; +double ShellPatch::L2Norm(var *vf) +{ + double tvf, dtvf = 0; + int BDW = overghost; MyList *sPp = PatL; while (sPp) @@ -3469,13 +3469,50 @@ double ShellPatch::L2Norm(var *vf) MPI_Allreduce(&dtvf, &tvf, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); tvf = sqrt(tvf); - - return tvf; -} - -// find maximum of abstract value, XX store position for maximum, Shellf store maximum themselvs -void ShellPatch::Find_Maximum(MyList *VarList, double *XX, - double *Shellf) + + return tvf; +} +void ShellPatch::L2Norm7(var **vf, double *norms) +{ + double tvf[7], dtvf[7]; + int BDW = overghost; + for (int i = 0; i < 7; i++) + dtvf[i] = 0; + + MyList *sPp = PatL; + while (sPp) + { + MyList *Bp = sPp->data->blb; + while (Bp) + { + Block *cg = Bp->data; + if (myrank == cg->rank) + { + f_l2normhelper7(cg->shape, cg->X[0], cg->X[1], cg->X[2], + sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2], + sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5], + cg->fgfs[vf[0]->sgfn], cg->fgfs[vf[1]->sgfn], cg->fgfs[vf[2]->sgfn], + cg->fgfs[vf[3]->sgfn], cg->fgfs[vf[4]->sgfn], cg->fgfs[vf[5]->sgfn], + cg->fgfs[vf[6]->sgfn], tvf, BDW); + for (int i = 0; i < 7; i++) + dtvf[i] += tvf[i]; + } + if (Bp == sPp->data->ble) + break; + Bp = Bp->next; + } + sPp = sPp->next; + } + + MPI_Allreduce(dtvf, tvf, 7, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + for (int i = 0; i < 7; i++) + norms[i] = sqrt(tvf[i]); +} + +// find maximum of abstract value, XX store position for maximum, Shellf store maximum themselvs +void ShellPatch::Find_Maximum(MyList *VarList, double *XX, + double *Shellf) { MyList *varl; int num_var = 0; diff --git a/AMSS_NCKU_source/ShellPatch.h b/AMSS_NCKU_source/ShellPatch.h index b64c79d..fa93ae1 100644 --- a/AMSS_NCKU_source/ShellPatch.h +++ b/AMSS_NCKU_source/ShellPatch.h @@ -195,10 +195,11 @@ public: bool Interp_One_Point(MyList *VarList, double *XX, /*input global Cartesian coordinate*/ double *Shellf, int Symmetry); - void write_Pablo_file_ss(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax, - char *filename, int sst); - double L2Norm(var *vf); - void Find_Maximum(MyList *VarList, double *XX, double *Shellf); -}; + void write_Pablo_file_ss(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax, + char *filename, int sst); + double L2Norm(var *vf); + void L2Norm7(var **vf, double *norms); + void Find_Maximum(MyList *VarList, double *XX, double *Shellf); +}; #endif /* SHELLPATCH_H */ diff --git a/AMSS_NCKU_source/bssn_class.C b/AMSS_NCKU_source/bssn_class.C index 6a12b14..3dd57fd 100644 --- a/AMSS_NCKU_source/bssn_class.C +++ b/AMSS_NCKU_source/bssn_class.C @@ -7422,41 +7422,24 @@ void bssn_class::Constraint_Out() #endif double ConV[7]; -#if (PSTR == 1 || PSTR == 2) - double ConV_h[7]; -#endif - -#ifdef WithShell - ConV[0] = SH->L2Norm(Cons_Ham); - ConV[1] = SH->L2Norm(Cons_Px); - ConV[2] = SH->L2Norm(Cons_Py); - ConV[3] = SH->L2Norm(Cons_Pz); - ConV[4] = SH->L2Norm(Cons_Gx); - ConV[5] = SH->L2Norm(Cons_Gy); - ConV[6] = SH->L2Norm(Cons_Gz); - ConVMonitor->writefile(PhysTime, 7, ConV); -#endif - for (int levi = 0; levi < GH->levels; levi++) - { -#if (PSTR == 0) - ConV[0] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Ham); - ConV[1] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Px); - ConV[2] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Py); - ConV[3] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Pz); - ConV[4] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gx); - ConV[5] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gy); - ConV[6] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gz); -#elif (PSTR == 1 || PSTR == 2) - ConV[0] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Ham, GH->Commlev[levi]); - ConV[1] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Px, GH->Commlev[levi]); - ConV[2] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Py, GH->Commlev[levi]); - ConV[3] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Pz, GH->Commlev[levi]); - ConV[4] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gx, GH->Commlev[levi]); - ConV[5] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gy, GH->Commlev[levi]); - ConV[6] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gz, GH->Commlev[levi]); - // misc::tillherecheck("before collect data to cpu0"); - // MPI_ALLREDUCE( sendbuf, recvbuf, count, datatype, op, comm), sendbu and recvbuf must be different - if (levi > 0) +#if (PSTR == 1 || PSTR == 2) + double ConV_h[7]; +#endif + var *ConstraintVars[7] = {Cons_Ham, Cons_Px, Cons_Py, Cons_Pz, Cons_Gx, Cons_Gy, Cons_Gz}; + +#ifdef WithShell + SH->L2Norm7(ConstraintVars, ConV); + ConVMonitor->writefile(PhysTime, 7, ConV); +#endif + for (int levi = 0; levi < GH->levels; levi++) + { +#if (PSTR == 0) + Parallel::L2Norm7(GH->PatL[levi]->data, ConstraintVars, ConV); +#elif (PSTR == 1 || PSTR == 2) + Parallel::L2Norm7(GH->PatL[levi]->data, ConstraintVars, ConV, GH->Commlev[levi]); + // misc::tillherecheck("before collect data to cpu0"); + // MPI_ALLREDUCE( sendbuf, recvbuf, count, datatype, op, comm), sendbu and recvbuf must be different + if (levi > 0) { if (GH->mylev == levi && myrank == GH->start_rank[levi]) for (int i = 0; i < 7; i++) diff --git a/AMSS_NCKU_source/fmisc.f90 b/AMSS_NCKU_source/fmisc.f90 index 6f5be07..e55f3de 100644 --- a/AMSS_NCKU_source/fmisc.f90 +++ b/AMSS_NCKU_source/fmisc.f90 @@ -1511,13 +1511,88 @@ deallocate(f_flat) f_out = f_out*dX*dY*dZ - return - - end subroutine l2normhelper -!-------------------------------------------------------------------------------------- -! calculate L2norm especially for shell Blocks - subroutine l2normhelper_sh(ex, X, Y, Z,xmin,ymin,zmin,xmax,ymax,zmax,& - f,f_out,gw,ogw,Symmetry) + return + + end subroutine l2normhelper +!-------------------------------------------------------------------------------------- + subroutine l2normhelper7(ex, X, Y, Z,xmin,ymin,zmin,xmax,ymax,zmax,& + f1,f2,f3,f4,f5,f6,f7,f_out,gw) + + implicit none +!~~~~~~> Input parameters: + integer,intent(in ):: ex(1:3) + real*8, intent(in ):: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3)),xmin,ymin,zmin,xmax,ymax,zmax + integer,intent(in)::gw + real*8, dimension(ex(1),ex(2),ex(3)),intent(in) :: f1,f2,f3,f4,f5,f6,f7 + real*8, intent(out) :: f_out(7) +!~~~~~~> Other variables: + + real*8 :: dX, dY, dZ + integer::imin,jmin,kmin + integer::imax,jmax,kmax + integer::i,j,k + real*8 :: s1,s2,s3,s4,s5,s6,s7 + + dX = X(2) - X(1) + dY = Y(2) - Y(1) + dZ = Z(2) - Z(1) + +! for ghost zone + imin = gw+1 + jmin = gw+1 + kmin = gw+1 + + imax = ex(1) - gw + jmax = ex(2) - gw + kmax = ex(3) - gw + +!for patch boundary (i.e., not ghost boundary) + +if(dabs(X(ex(1))-xmax) < dX) imax = ex(1) +if(dabs(Y(ex(2))-ymax) < dY) jmax = ex(2) +if(dabs(Z(ex(3))-zmax) < dZ) kmax = ex(3) +if(dabs(X(1)-xmin) < dX) imin = 1 +if(dabs(Y(1)-ymin) < dY) jmin = 1 +if(dabs(Z(1)-zmin) < dZ) kmin = 1 + + s1 = 0.d0 + s2 = 0.d0 + s3 = 0.d0 + s4 = 0.d0 + s5 = 0.d0 + s6 = 0.d0 + s7 = 0.d0 + + do k=kmin,kmax + do j=jmin,jmax +!DIR$ SIMD REDUCTION(+:s1,s2,s3,s4,s5,s6,s7) + do i=imin,imax + s1 = s1 + f1(i,j,k)*f1(i,j,k) + s2 = s2 + f2(i,j,k)*f2(i,j,k) + s3 = s3 + f3(i,j,k)*f3(i,j,k) + s4 = s4 + f4(i,j,k)*f4(i,j,k) + s5 = s5 + f5(i,j,k)*f5(i,j,k) + s6 = s6 + f6(i,j,k)*f6(i,j,k) + s7 = s7 + f7(i,j,k)*f7(i,j,k) + enddo + enddo + enddo + + f_out(1) = s1*dX*dY*dZ + f_out(2) = s2*dX*dY*dZ + f_out(3) = s3*dX*dY*dZ + f_out(4) = s4*dX*dY*dZ + f_out(5) = s5*dX*dY*dZ + f_out(6) = s6*dX*dY*dZ + f_out(7) = s7*dX*dY*dZ + + return + + end subroutine l2normhelper7 +!-------------------------------------------------------------------------------------- +! calculate L2norm especially for shell Blocks + subroutine l2normhelper_sh(ex, X, Y, Z,xmin,ymin,zmin,xmax,ymax,zmax,& + f,f_out,gw,ogw,Symmetry) implicit none !~~~~~~> Input parameters: diff --git a/AMSS_NCKU_source/fmisc.h b/AMSS_NCKU_source/fmisc.h index 1a0b6d6..5af428e 100644 --- a/AMSS_NCKU_source/fmisc.h +++ b/AMSS_NCKU_source/fmisc.h @@ -12,9 +12,10 @@ #define f_global_interpind global_interpind #define f_global_interpind2d global_interpind2d #define f_global_interpind1d global_interpind1d -#define f_l2normhelper l2normhelper -#define f_l2normhelper_sh l2normhelper_sh -#define f_l2normhelper_sh_rms l2normhelper_sh_rms +#define f_l2normhelper l2normhelper +#define f_l2normhelper7 l2normhelper7 +#define f_l2normhelper_sh l2normhelper_sh +#define f_l2normhelper_sh_rms l2normhelper_sh_rms #define f_average average #define f_average3 average3 #define f_average2 average2 @@ -41,9 +42,10 @@ #define f_global_interpind GLOBAL_INTERPIND #define f_global_interpind2d GLOBAL_INTERPIND2D #define f_global_interpind1d GLOBAL_INTERPIND1D -#define f_l2normhelper L2NORMHELPER -#define f_l2normhelper_sh L2NORMHELPER_SH -#define f_l2normhelper_sh_rms L2NORMHELPER_SH_RMS +#define f_l2normhelper L2NORMHELPER +#define f_l2normhelper7 L2NORMHELPER7 +#define f_l2normhelper_sh L2NORMHELPER_SH +#define f_l2normhelper_sh_rms L2NORMHELPER_SH_RMS #define f_average AVERAGE #define f_average3 AVERAGE3 #define f_average2 AVERAGE2 @@ -70,9 +72,10 @@ #define f_global_interpind global_interpind_ #define f_global_interpind2d global_interpind2d_ #define f_global_interpind1d global_interpind1d_ -#define f_l2normhelper l2normhelper_ -#define f_l2normhelper_sh l2normhelper_sh_ -#define f_l2normhelper_sh_rms l2normhelper_sh_rms_ +#define f_l2normhelper l2normhelper_ +#define f_l2normhelper7 l2normhelper7_ +#define f_l2normhelper_sh l2normhelper_sh_ +#define f_l2normhelper_sh_rms l2normhelper_sh_rms_ #define f_average average_ #define f_average3 average3_ #define f_average2 average2_ @@ -156,20 +159,29 @@ extern "C" int *, double *, int &, int &); } -extern "C" -{ - void f_l2normhelper(int *, double *, double *, double *, - double &, double &, double &, - double &, double &, double &, - double *, double &, int &); -} - -extern "C" -{ - void f_l2normhelper_sh(int *, double *, double *, double *, - double &, double &, double &, - double &, double &, double &, - double *, double &, int &, int &, int &); +extern "C" +{ + void f_l2normhelper(int *, double *, double *, double *, + double &, double &, double &, + double &, double &, double &, + double *, double &, int &); +} + +extern "C" +{ + void f_l2normhelper7(int *, double *, double *, double *, + double &, double &, double &, + double &, double &, double &, + double *, double *, double *, double *, + double *, double *, double *, double *, int &); +} + +extern "C" +{ + void f_l2normhelper_sh(int *, double *, double *, double *, + double &, double &, double &, + double &, double &, double &, + double *, double &, int &, int &, int &); } extern "C"