Compare commits

..

2 Commits

Author SHA1 Message Date
3f7e20f702 删除diff_new.f90中冗余部分,方便后续工作 2026-02-08 00:54:23 +08:00
673dd20722 对fmisc.f90的polint修改 2026-02-07 01:56:44 +08:00
19 changed files with 1868 additions and 5059 deletions

3
.gitignore vendored
View File

@@ -1,6 +1,3 @@
__pycache__ __pycache__
GW150914 GW150914
GW150914-origin GW150914-origin
docs
*.tmp

8
AMSS_NCKU_ABEtest.py Executable file → Normal file
View File

@@ -34,15 +34,14 @@ import time
File_directory = os.path.join(input_data.File_directory) File_directory = os.path.join(input_data.File_directory)
## Check if output directory exists and if TwoPuncture data is available ## Check if output directory exists and if TwoPuncture data is available
#skip_twopuncture = False skip_twopuncture = False
skip_twopuncture = True
output_directory = os.path.join(File_directory, "AMSS_NCKU_output") output_directory = os.path.join(File_directory, "AMSS_NCKU_output")
binary_results_directory = os.path.join(output_directory, input_data.Output_directory) binary_results_directory = os.path.join(output_directory, input_data.Output_directory)
if os.path.exists(File_directory): if os.path.exists(File_directory):
print( " Output directory already exists." ) print( " Output directory already exists." )
print() print()
'''
# Check if TwoPuncture initial data files exist # Check if TwoPuncture initial data files exist
if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture"): if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture"):
twopuncture_output = os.path.join(output_directory, "TwoPunctureABE") twopuncture_output = os.path.join(output_directory, "TwoPunctureABE")
@@ -72,11 +71,10 @@ if os.path.exists(File_directory):
print( " Please input 'skip' or 'regenerate'." ) print( " Please input 'skip' or 'regenerate'." )
except ValueError: except ValueError:
print( " Please input 'skip' or 'regenerate'." ) print( " Please input 'skip' or 'regenerate'." )
else: else:
print( " TwoPuncture initial data not found, will regenerate everything." ) print( " TwoPuncture initial data not found, will regenerate everything." )
print() print()
'''
# If not skipping, remove and recreate directory # If not skipping, remove and recreate directory
if not skip_twopuncture: if not skip_twopuncture:
shutil.rmtree(File_directory, ignore_errors=True) shutil.rmtree(File_directory, ignore_errors=True)

View File

@@ -277,3 +277,4 @@ def main():
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@@ -37,51 +37,57 @@ close(77)
end program checkFFT end program checkFFT
#endif #endif
!-------------
! Optimized FFT using Intel oneMKL DFTI
! Mathematical equivalence: Standard DFT definition
! Forward (isign=1): X[k] = sum_{n=0}^{N-1} x[n] * exp(-2*pi*i*k*n/N)
! Backward (isign=-1): X[k] = sum_{n=0}^{N-1} x[n] * exp(+2*pi*i*k*n/N)
! Input/Output: dataa is interleaved complex array [Re(0),Im(0),Re(1),Im(1),...]
!------------- !-------------
SUBROUTINE four1(dataa,nn,isign) SUBROUTINE four1(dataa,nn,isign)
use MKL_DFTI
implicit none implicit none
INTEGER, intent(in) :: isign, nn INTEGER::isign,nn
DOUBLE PRECISION, dimension(2*nn), intent(inout) :: dataa double precision,dimension(2*nn)::dataa
INTEGER::i,istep,j,m,mmax,n
type(DFTI_DESCRIPTOR), pointer :: desc double precision::tempi,tempr
integer :: status DOUBLE PRECISION::theta,wi,wpi,wpr,wr,wtemp
n=2*nn
! Create DFTI descriptor for 1D complex-to-complex transform j=1
status = DftiCreateDescriptor(desc, DFTI_DOUBLE, DFTI_COMPLEX, 1, nn) do i=1,n,2
if (status /= 0) return if(j.gt.i)then
tempr=dataa(j)
! Set input/output storage as interleaved complex (default) tempi=dataa(j+1)
status = DftiSetValue(desc, DFTI_PLACEMENT, DFTI_INPLACE) dataa(j)=dataa(i)
if (status /= 0) then dataa(j+1)=dataa(i+1)
status = DftiFreeDescriptor(desc) dataa(i)=tempr
return dataa(i+1)=tempi
endif
m=nn
1 if ((m.ge.2).and.(j.gt.m)) then
j=j-m
m=m/2
goto 1
endif
j=j+m
enddo
mmax=2
2 if (n.gt.mmax) then
istep=2*mmax
theta=6.28318530717959d0/(isign*mmax)
wpr=-2.d0*sin(0.5d0*theta)**2
wpi=sin(theta)
wr=1.d0
wi=0.d0
do m=1,mmax,2
do i=m,n,istep
j=i+mmax
tempr=sngl(wr)*dataa(j)-sngl(wi)*dataa(j+1)
tempi=sngl(wr)*dataa(j+1)+sngl(wi)*dataa(j)
dataa(j)=dataa(i)-tempr
dataa(j+1)=dataa(i+1)-tempi
dataa(i)=dataa(i)+tempr
dataa(i+1)=dataa(i+1)+tempi
enddo
wtemp=wr
wr=wr*wpr-wi*wpi+wr
wi=wi*wpr+wtemp*wpi+wi
enddo
mmax=istep
goto 2
endif endif
! Commit the descriptor
status = DftiCommitDescriptor(desc)
if (status /= 0) then
status = DftiFreeDescriptor(desc)
return
endif
! Execute FFT based on direction
if (isign == 1) then
! Forward FFT: exp(-2*pi*i*k*n/N)
status = DftiComputeForward(desc, dataa)
else
! Backward FFT: exp(+2*pi*i*k*n/N)
status = DftiComputeBackward(desc, dataa)
endif
! Free descriptor
status = DftiFreeDescriptor(desc)
return return
END SUBROUTINE four1 END SUBROUTINE four1

View File

@@ -4,8 +4,6 @@
#include "prolongrestrict.h" #include "prolongrestrict.h"
#include "misc.h" #include "misc.h"
#include "parameters.h" #include "parameters.h"
#include <vector>
#include <algorithm>
int Parallel::partition1(int &nx, int split_size, int min_width, int cpusize, int shape) // special for 1 diemnsion int Parallel::partition1(int &nx, int split_size, int min_width, int cpusize, int shape) // special for 1 diemnsion
{ {
@@ -354,73 +352,14 @@ MyList<Block> *Parallel::distribute(MyList<Patch> *PatchLIST, int cpusize, int i
split_size = Mymax(min_size, block_size / nodes); split_size = Mymax(min_size, block_size / nodes);
split_size = Mymax(1, split_size); split_size = Mymax(1, split_size);
// Pass 1: compute block volumes for greedy rank assignment int n_rank = 0;
std::vector<long> block_volumes;
PLi = PatchLIST; PLi = PatchLIST;
int reacpu = 0; int reacpu = 0;
while (PLi) while (PLi)
{ {
Patch *PP = PLi->data; Patch *PP = PLi->data;
reacpu += partition3(nxyz, split_size, mmin_width, nodes, PP->shape); reacpu += partition3(nxyz, split_size, mmin_width, nodes, PP->shape);
int ibbox_here[2 * dim];
for (int i = 0; i < nxyz[0]; i++)
for (int j = 0; j < nxyz[1]; j++)
for (int k = 0; k < nxyz[2]; k++)
{
ibbox_here[0] = (PP->shape[0] * i) / nxyz[0];
ibbox_here[3] = (PP->shape[0] * (i + 1)) / nxyz[0] - 1;
ibbox_here[1] = (PP->shape[1] * j) / nxyz[1];
ibbox_here[4] = (PP->shape[1] * (j + 1)) / nxyz[1] - 1;
ibbox_here[2] = (PP->shape[2] * k) / nxyz[2];
ibbox_here[5] = (PP->shape[2] * (k + 1)) / nxyz[2] - 1;
if (periodic)
{
for (int d = 0; d < dim; d++) { ibbox_here[d] -= ghost_width; ibbox_here[dim + d] += ghost_width; }
}
else
{
ibbox_here[0] = Mymax(0, ibbox_here[0] - ghost_width);
ibbox_here[3] = Mymin(PP->shape[0] - 1, ibbox_here[3] + ghost_width);
ibbox_here[1] = Mymax(0, ibbox_here[1] - ghost_width);
ibbox_here[4] = Mymin(PP->shape[1] - 1, ibbox_here[4] + ghost_width);
ibbox_here[2] = Mymax(0, ibbox_here[2] - ghost_width);
ibbox_here[5] = Mymin(PP->shape[2] - 1, ibbox_here[5] + ghost_width);
}
long vol = 1;
for (int d = 0; d < dim; d++)
vol *= (ibbox_here[dim + d] - ibbox_here[d] + 1);
block_volumes.push_back(vol);
}
PLi = PLi->next;
}
// Greedy LPT: sort by volume descending, assign each to least-loaded rank
std::vector<int> assigned_ranks(block_volumes.size());
{
std::vector<int> order(block_volumes.size());
for (int i = 0; i < (int)order.size(); i++) order[i] = i;
std::sort(order.begin(), order.end(), [&](int a, int b) {
return block_volumes[a] > block_volumes[b];
});
std::vector<long> load(cpusize, 0);
for (int idx : order)
{
int min_r = 0;
for (int r = 1; r < cpusize; r++)
if (load[r] < load[min_r]) min_r = r;
assigned_ranks[idx] = min_r;
load[min_r] += block_volumes[idx];
}
}
// Pass 2: create blocks with pre-assigned ranks
int block_idx = 0;
PLi = PatchLIST;
while (PLi)
{
Patch *PP = PLi->data;
partition3(nxyz, split_size, mmin_width, nodes, PP->shape);
Block *ng0, *ng; Block *ng0, *ng;
int shape_here[dim], ibbox_here[2 * dim]; int shape_here[dim], ibbox_here[2 * dim];
@@ -504,7 +443,10 @@ MyList<Block> *Parallel::distribute(MyList<Patch> *PatchLIST, int cpusize, int i
int shape_res[dim * pices]; int shape_res[dim * pices];
double bbox_res[2 * dim * pices]; double bbox_res[2 * dim * pices];
misc::dividBlock(dim, shape_here, bbox_here, pices, picef, shape_res, bbox_res, min_width); misc::dividBlock(dim, shape_here, bbox_here, pices, picef, shape_res, bbox_res, min_width);
ng = ng0 = new Block(dim, shape_res, bbox_res, assigned_ranks[block_idx++], ingfsi, fngfsi, PP->lev, 0); // delete through KillBlocks ng = ng0 = new Block(dim, shape_res, bbox_res, n_rank++, ingfsi, fngfsi, PP->lev, 0); // delete through KillBlocks
// if(n_rank==cpusize) {n_rank=0; cerr<<"place one!!"<<endl;}
// ng->checkBlock(); // ng->checkBlock();
if (BlL) if (BlL)
BlL->insert(ng); BlL->insert(ng);
@@ -513,19 +455,22 @@ MyList<Block> *Parallel::distribute(MyList<Patch> *PatchLIST, int cpusize, int i
for (int i = 1; i < pices; i++) for (int i = 1; i < pices; i++)
{ {
ng = new Block(dim, shape_res + i * dim, bbox_res + i * 2 * dim, assigned_ranks[block_idx++], ingfsi, fngfsi, PP->lev, i); // delete through KillBlocks ng = new Block(dim, shape_res + i * dim, bbox_res + i * 2 * dim, n_rank++, ingfsi, fngfsi, PP->lev, i); // delete through KillBlocks
// if(n_rank==cpusize) {n_rank=0; cerr<<"place two!! "<<i<<endl;}
// ng->checkBlock(); // ng->checkBlock();
BlL->insert(ng); BlL->insert(ng);
} }
} }
#else #else
ng = ng0 = new Block(dim, shape_here, bbox_here, assigned_ranks[block_idx++], ingfsi, fngfsi, PP->lev); // delete through KillBlocks ng = ng0 = new Block(dim, shape_here, bbox_here, n_rank++, ingfsi, fngfsi, PP->lev); // delete through KillBlocks
// ng->checkBlock(); // ng->checkBlock();
if (BlL) if (BlL)
BlL->insert(ng); BlL->insert(ng);
else else
BlL = new MyList<Block>(ng); // delete through KillBlocks BlL = new MyList<Block>(ng); // delete through KillBlocks
#endif #endif
if (n_rank == cpusize)
n_rank = 0;
// set PP->blb // set PP->blb
if (i == 0 && j == 0 && k == 0) if (i == 0 && j == 0 && k == 0)
@@ -3559,7 +3504,7 @@ int Parallel::data_packermix(double *data, MyList<Parallel::gridseg> *src, MyLis
return size_out; return size_out;
} }
//
void Parallel::transfer(MyList<Parallel::gridseg> **src, MyList<Parallel::gridseg> **dst, void Parallel::transfer(MyList<Parallel::gridseg> **src, MyList<Parallel::gridseg> **dst,
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /*target */, MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /*target */,
int Symmetry) int Symmetry)
@@ -3567,20 +3512,13 @@ void Parallel::transfer(MyList<Parallel::gridseg> **src, MyList<Parallel::gridse
int myrank, cpusize; int myrank, cpusize;
MPI_Comm_size(MPI_COMM_WORLD, &cpusize); MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank); MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
/*
// Early exit: if no gridseg pairs exist for any node, skip all work
{
bool has_segs = false;
for (int n = 0; n < cpusize; n++) {
if (src[n] && dst[n]) { has_segs = true; break; }
}
if (!has_segs) return;
}
*/
int node; int node;
MPI_Request *reqs = new MPI_Request[2 * cpusize]; MPI_Request *reqs;
MPI_Status *stats = new MPI_Status[2 * cpusize]; MPI_Status *stats;
reqs = new MPI_Request[2 * cpusize];
stats = new MPI_Status[2 * cpusize];
int req_no = 0; int req_no = 0;
double **send_data, **rec_data; double **send_data, **rec_data;
@@ -3588,42 +3526,50 @@ void Parallel::transfer(MyList<Parallel::gridseg> **src, MyList<Parallel::gridse
rec_data = new double *[cpusize]; rec_data = new double *[cpusize];
int length; int length;
for (node = 0; node < cpusize; node++)
send_data[node] = rec_data[node] = 0;
// 第1步: 本地拷贝 + 所有 Irecv
for (node = 0; node < cpusize; node++) for (node = 0; node < cpusize; node++)
{ {
send_data[node] = rec_data[node] = 0;
if (node == myrank) if (node == myrank)
{ {
if (length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry)) if (length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry))
{ {
rec_data[node] = new double[length]; rec_data[node] = new double[length];
if (!rec_data[node])
{
cout << "out of memory when new in short transfer, place 1" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
data_packer(rec_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry); data_packer(rec_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
} }
} }
else else
{ {
// send from this cpu to cpu#node
if (length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry))
{
send_data[node] = new double[length];
if (!send_data[node])
{
cout << "out of memory when new in short transfer, place 2" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
data_packer(send_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
MPI_Isend((void *)send_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++);
}
// receive from cpu#node to this cpu
if (length = data_packer(0, src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry)) if (length = data_packer(0, src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry))
{ {
rec_data[node] = new double[length]; rec_data[node] = new double[length];
if (!rec_data[node])
{
cout << "out of memory when new in short transfer, place 3" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
MPI_Irecv((void *)rec_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++); MPI_Irecv((void *)rec_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++);
} }
} }
} }
// wait for all requests to complete
// 第2步: pack + Isend
for (node = 0; node < cpusize; node++)
{
if (node == myrank) continue;
if (length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry))
{
send_data[node] = new double[length];
data_packer(send_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
MPI_Isend((void *)send_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++);
}
}
MPI_Waitall(req_no, reqs, stats); MPI_Waitall(req_no, reqs, stats);
for (node = 0; node < cpusize; node++) for (node = 0; node < cpusize; node++)

File diff suppressed because it is too large Load Diff

View File

@@ -1,8 +1,7 @@
#ifndef TWO_PUNCTURES_H #ifndef TWO_PUNCTURES_H
#define TWO_PUNCTURES_H #define TWO_PUNCTURES_H
#include <omp.h>
#define StencilSize 19 #define StencilSize 19
#define N_PlaneRelax 1 #define N_PlaneRelax 1
#define NRELAX 200 #define NRELAX 200
@@ -43,18 +42,6 @@ private:
int ntotal; int ntotal;
// ===== Precomputed spectral derivative matrices =====
double *D1_A, *D2_A;
double *D1_B, *D2_B;
double *DF1_phi, *DF2_phi;
// ===== Pre-allocated workspace for LineRelax (per-thread) =====
int max_threads;
double **ws_diag_be, **ws_e_be, **ws_f_be, **ws_b_be, **ws_x_be;
double **ws_l_be, **ws_u_be, **ws_d_be, **ws_y_be;
double **ws_diag_al, **ws_e_al, **ws_f_al, **ws_b_al, **ws_x_al;
double **ws_l_al, **ws_u_al, **ws_d_al, **ws_y_al;
struct parameters struct parameters
{ {
int nvar, n1, n2, n3; int nvar, n1, n2, n3;
@@ -71,28 +58,6 @@ public:
int Newtonmaxit); int Newtonmaxit);
~TwoPunctures(); ~TwoPunctures();
// 02/07: New/modified methods
void allocate_workspace();
void free_workspace();
void precompute_derivative_matrices();
void build_cheb_deriv_matrices(int n, double *D1, double *D2);
void build_fourier_deriv_matrices(int N, double *DF1, double *DF2);
void Derivatives_AB3_MatMul(int nvar, int n1, int n2, int n3, derivs v);
void ThomasAlgorithm_ws(int N, double *b, double *a, double *c, double *x, double *q,
double *l, double *u_ws, double *d, double *y);
void LineRelax_be_omp(double *dv,
int const i, int const k, int const nvar,
int const n1, int const n2, int const n3,
double const *rhs, int const *ncols, int **cols,
double **JFD, int tid);
void LineRelax_al_omp(double *dv,
int const j, int const k, int const nvar,
int const n1, int const n2, int const n3,
double const *rhs, int const *ncols,
int **cols, double **JFD, int tid);
void relax_omp(double *dv, int const nvar, int const n1, int const n2, int const n3,
double const *rhs, int const *ncols, int **cols, double **JFD);
void Solve(); void Solve();
void set_initial_guess(derivs v); void set_initial_guess(derivs v);
int index(int i, int j, int k, int l, int a, int b, int c, int d); int index(int i, int j, int k, int l, int a, int b, int c, int d);
@@ -151,11 +116,23 @@ public:
double BY_KKofxyz(double x, double y, double z); double BY_KKofxyz(double x, double y, double z);
void SetMatrix_JFD(int nvar, int n1, int n2, int n3, derivs u, int *ncols, int **cols, double **Matrix); void SetMatrix_JFD(int nvar, int n1, int n2, int n3, derivs u, int *ncols, int **cols, double **Matrix);
void J_times_dv(int nvar, int n1, int n2, int n3, derivs dv, double *Jdv, derivs u); void J_times_dv(int nvar, int n1, int n2, int n3, derivs dv, double *Jdv, derivs u);
void relax(double *dv, int const nvar, int const n1, int const n2, int const n3,
double const *rhs, int const *ncols, int **cols, double **JFD);
void LineRelax_be(double *dv,
int const i, int const k, int const nvar,
int const n1, int const n2, int const n3,
double const *rhs, int const *ncols, int **cols,
double **JFD);
void JFD_times_dv(int i, int j, int k, int nvar, int n1, int n2, void JFD_times_dv(int i, int j, int k, int nvar, int n1, int n2,
int n3, derivs dv, derivs u, double *values); int n3, derivs dv, derivs u, double *values);
void LinEquations(double A, double B, double X, double R, void LinEquations(double A, double B, double X, double R,
double x, double r, double phi, double x, double r, double phi,
double y, double z, derivs dU, derivs U, double *values); double y, double z, derivs dU, derivs U, double *values);
void LineRelax_al(double *dv,
int const j, int const k, int const nvar,
int const n1, int const n2, int const n3,
double const *rhs, int const *ncols,
int **cols, double **JFD);
void ThomasAlgorithm(int N, double *b, double *a, double *c, double *x, double *q); void ThomasAlgorithm(int N, double *b, double *a, double *c, double *x, double *q);
void Save(char *fname); void Save(char *fname);
// provided by Vasileios Paschalidis (vpaschal@illinois.edu) // provided by Vasileios Paschalidis (vpaschal@illinois.edu)

View File

@@ -106,8 +106,7 @@
call getpbh(BHN,Porg,Mass) call getpbh(BHN,Porg,Mass)
#endif #endif
!!! sanity check (disabled in production builds for performance) !!! sanity check
#ifdef DEBUG
dX = sum(chi)+sum(trK)+sum(dxx)+sum(gxy)+sum(gxz)+sum(dyy)+sum(gyz)+sum(dzz) & dX = sum(chi)+sum(trK)+sum(dxx)+sum(gxy)+sum(gxz)+sum(dyy)+sum(gyz)+sum(dzz) &
+sum(Axx)+sum(Axy)+sum(Axz)+sum(Ayy)+sum(Ayz)+sum(Azz) & +sum(Axx)+sum(Axy)+sum(Axz)+sum(Ayy)+sum(Ayz)+sum(Azz) &
+sum(Gamx)+sum(Gamy)+sum(Gamz) & +sum(Gamx)+sum(Gamy)+sum(Gamz) &
@@ -137,7 +136,6 @@
gont = 1 gont = 1
return return
endif endif
#endif
PI = dacos(-ONE) PI = dacos(-ONE)
@@ -161,8 +159,36 @@
chi_rhs = F2o3 *chin1*( alpn1 * trK - div_beta ) !rhs for chi chi_rhs = F2o3 *chin1*( alpn1 * trK - div_beta ) !rhs for chi
call fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
call fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev)
call fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev)
call fderivs(ex,dyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
call fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev)
call fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
gxx_rhs = - TWO * alpn1 * Axx - F2o3 * gxx * div_beta + &
TWO *( gxx * betaxx + gxy * betayx + gxz * betazx)
gyy_rhs = - TWO * alpn1 * Ayy - F2o3 * gyy * div_beta + &
TWO *( gxy * betaxy + gyy * betayy + gyz * betazy)
gzz_rhs = - TWO * alpn1 * Azz - F2o3 * gzz * div_beta + &
TWO *( gxz * betaxz + gyz * betayz + gzz * betazz)
gxy_rhs = - TWO * alpn1 * Axy + F1o3 * gxy * div_beta + &
gxx * betaxy + gxz * betazy + &
gyy * betayx + gyz * betazx &
- gxy * betazz
gyz_rhs = - TWO * alpn1 * Ayz + F1o3 * gyz * div_beta + &
gxy * betaxz + gyy * betayz + &
gxz * betaxy + gzz * betazy &
- gyz * betaxx
gxz_rhs = - TWO * alpn1 * Axz + F1o3 * gxz * div_beta + &
gxx * betaxz + gxy * betayz + &
gyz * betayx + gzz * betazx &
- gxz * betayy !rhs for gij
! invert tilted metric ! invert tilted metric
gupzz = gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - & gupzz = gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - &
@@ -173,12 +199,7 @@
gupyy = ( gxx * gzz - gxz * gxz ) / gupzz gupyy = ( gxx * gzz - gxz * gxz ) / gupzz
gupyz = - ( gxx * gyz - gxy * gxz ) / gupzz gupyz = - ( gxx * gyz - gxy * gxz ) / gupzz
gupzz = ( gxx * gyy - gxy * gxy ) / gupzz gupzz = ( gxx * gyy - gxy * gxy ) / gupzz
call fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
call fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev)
call fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev)
call fderivs(ex,dyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
call fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev)
call fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
if(co == 0)then if(co == 0)then
! Gam^i_Res = Gam^i + gup^ij_,j ! Gam^i_Res = Gam^i + gup^ij_,j
Gmx_Res = Gamx - (gupxx*(gupxx*gxxx+gupxy*gxyx+gupxz*gxzx)& Gmx_Res = Gamx - (gupxx*(gupxx*gxxx+gupxy*gxyx+gupxz*gxzx)&
@@ -924,99 +945,99 @@
!!!!!!!!!advection term part !!!!!!!!!advection term part
gxx_rhs = - TWO * alpn1 * Axx - F2o3 * gxx * div_beta + &
TWO *( gxx * betaxx + gxy * betayx + gxz * betazx)
gyy_rhs = - TWO * alpn1 * Ayy - F2o3 * gyy * div_beta + &
TWO *( gxy * betaxy + gyy * betayy + gyz * betazy)
gzz_rhs = - TWO * alpn1 * Azz - F2o3 * gzz * div_beta + &
TWO *( gxz * betaxz + gyz * betayz + gzz * betazz)
gxy_rhs = - TWO * alpn1 * Axy + F1o3 * gxy * div_beta + &
gxx * betaxy + gxz * betazy + &
gyy * betayx + gyz * betazx &
- gxy * betazz
gyz_rhs = - TWO * alpn1 * Ayz + F1o3 * gyz * div_beta + &
gxy * betaxz + gyy * betayz + &
gxz * betaxy + gzz * betazy &
- gyz * betaxx
gxz_rhs = - TWO * alpn1 * Axz + F1o3 * gxz * div_beta + &
gxx * betaxz + gxy * betayz + &
gyz * betayx + gzz * betazx &
- gxz * betayy !rhs for gij
if(eps>0)then
! usual Kreiss-Oliger dissipation
call merge_lopsided_kodis(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps)
call merge_lopsided_kodis(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps)
call merge_lopsided_kodis(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA,eps)
call merge_lopsided_kodis(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA,eps)
call merge_lopsided_kodis(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA,eps)
call merge_lopsided_kodis(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA,eps)
call merge_lopsided_kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS,eps)
call merge_lopsided_kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA,eps)
else
call lopsided(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS) call lopsided(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS)
call lopsided(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA) call lopsided(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA)
call lopsided(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA) call lopsided(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA)
call lopsided(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS) call lopsided(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS)
call lopsided(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA) call lopsided(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA)
call lopsided(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA) call lopsided(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA)
call lopsided(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS)
call lopsided(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS) call lopsided(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS)
call lopsided(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS) call lopsided(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS)
call lopsided(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA) call lopsided(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA)
!!
call lopsided(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS) call lopsided(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS)
#if (GAUGE == 0 || GAUGE == 1 || GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7)
call lopsided(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS) call lopsided(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS)
call lopsided(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS) call lopsided(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS)
call lopsided(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA) call lopsided(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA)
#endif
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
call lopsided(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS) call lopsided(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS)
call lopsided(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS) call lopsided(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS)
call lopsided(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA) call lopsided(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA)
#endif
if(eps>0)then
! usual Kreiss-Oliger dissipation
call kodis(ex,X,Y,Z,chi,chi_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,trK,trK_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,dxx,gxx_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,gxy,gxy_rhs,AAS,Symmetry,eps)
call kodis(ex,X,Y,Z,gxz,gxz_rhs,ASA,Symmetry,eps)
call kodis(ex,X,Y,Z,dyy,gyy_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,gyz,gyz_rhs,SAA,Symmetry,eps)
call kodis(ex,X,Y,Z,dzz,gzz_rhs,SSS,Symmetry,eps)
#if 0
#define i 42
#define j 40
#define k 40
if(Lev == 1)then
write(*,*) X(i),Y(j),Z(k)
write(*,*) "before",Axx_rhs(i,j,k)
endif
#undef i
#undef j
#undef k
!!stop
#endif
call kodis(ex,X,Y,Z,Axx,Axx_rhs,SSS,Symmetry,eps)
#if 0
#define i 42
#define j 40
#define k 40
if(Lev == 1)then
write(*,*) X(i),Y(j),Z(k)
write(*,*) "after",Axx_rhs(i,j,k)
endif
#undef i
#undef j
#undef k
!!stop
#endif
call kodis(ex,X,Y,Z,Axy,Axy_rhs,AAS,Symmetry,eps)
call kodis(ex,X,Y,Z,Axz,Axz_rhs,ASA,Symmetry,eps)
call kodis(ex,X,Y,Z,Ayy,Ayy_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,Ayz,Ayz_rhs,SAA,Symmetry,eps)
call kodis(ex,X,Y,Z,Azz,Azz_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,Gamx,Gamx_rhs,ASS,Symmetry,eps)
call kodis(ex,X,Y,Z,Gamy,Gamy_rhs,SAS,Symmetry,eps)
call kodis(ex,X,Y,Z,Gamz,Gamz_rhs,SSA,Symmetry,eps)
#if 1
!! bam does not apply dissipation on gauge variables
call kodis(ex,X,Y,Z,Lap,Lap_rhs,SSS,Symmetry,eps)
call kodis(ex,X,Y,Z,betax,betax_rhs,ASS,Symmetry,eps)
call kodis(ex,X,Y,Z,betay,betay_rhs,SAS,Symmetry,eps)
call kodis(ex,X,Y,Z,betaz,betaz_rhs,SSA,Symmetry,eps)
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
call kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,ASS,Symmetry,eps)
call kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,SAS,Symmetry,eps)
call kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,SSA,Symmetry,eps)
#endif
#endif
endif endif
@@ -1163,265 +1184,3 @@ endif
return return
end function compute_rhs_bssn end function compute_rhs_bssn
subroutine merge_lopsided_kodis(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA,eps)
implicit none
!~~~~~~> Input parameters:
integer, intent(in) :: ex(1:3),Symmetry
real*8, intent(in) :: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
real*8,dimension(ex(1),ex(2),ex(3)),intent(in) :: f,Sfx,Sfy,Sfz
real*8,dimension(ex(1),ex(2),ex(3)),intent(inout):: f_rhs
real*8,dimension(3),intent(in) ::SoA
!~~~~~~> local variables:
! note index -2,-1,0, so we have 3 extra points
real*8,dimension(-2:ex(1),-2:ex(2),-2:ex(3)) :: fh
integer :: imin_lopsided,jmin_lopsided,kmin_lopsided,imin_kodis,jmin_kodis,kmin_kodis,imax,jmax,kmax,i,j,k
real*8 :: dX,dY,dZ
real*8 :: d12dx,d12dy,d12dz,d2dx,d2dy,d2dz
real*8, parameter :: ZEO=0.d0,ONE=1.d0, F3=3.d0
real*8, parameter :: TWO=2.d0,F6=6.0d0,F18=1.8d1
real*8, parameter :: F12=1.2d1, F10=1.d1,EIT=8.d0
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
real*8, parameter :: SIX=6.d0,FIT=1.5d1,TWT=2.d1
real*8,parameter::cof=6.4d1 ! 2^6
real*8,intent(in) :: eps
dX = X(2)-X(1)
dY = Y(2)-Y(1)
dZ = Z(2)-Z(1)
d12dx = ONE/F12/dX
d12dy = ONE/F12/dY
d12dz = ONE/F12/dZ
d2dx = ONE/TWO/dX
d2dy = ONE/TWO/dY
d2dz = ONE/TWO/dZ
imax = ex(1)
jmax = ex(2)
kmax = ex(3)
imin_lopsided = 1
jmin_lopsided = 1
kmin_lopsided = 1
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin_lopsided = -2
if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin_lopsided = -2
if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin_lopsided = -2
imin_kodis = 1
jmin_kodis = 1
kmin_kodis = 1
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin_kodis = -2
if(Symmetry == OCTANT .and. dabs(X(1)) < dX) imin_kodis = -2
if(Symmetry == OCTANT .and. dabs(Y(1)) < dY) jmin_kodis = -2
call symmetry_bd(3,ex,f,fh,SoA)
! upper bound set ex-1 only for efficiency,
! the loop body will set ex 0 also
do k=1,ex(3)-1
do j=1,ex(2)-1
do i=1,ex(1)-1
!! new code, 2012dec27, based on bam
! x direction
if(Sfx(i,j,k) > ZEO)then
if(i+3 <= imax)then
! v
! D f = ------[ - 3f - 10f + 18f - 6f + f ]
! i 12dx i-v i i+v i+2v i+3v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d12dx*(-F3*fh(i-1,j,k)-F10*fh(i,j,k)+F18*fh(i+1,j,k) &
-F6*fh(i+2,j,k)+ fh(i+3,j,k))
elseif(i+2 <= imax)then
!
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
! fx(i) = ---------------------------------------------
! 12 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
elseif(i+1 <= imax)then
! v
! D f = ------[ 3f + 10f - 18f + 6f - f ]
! i 12dx i+v i i-v i-2v i-3v
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfx(i,j,k)*d12dx*(-F3*fh(i+1,j,k)-F10*fh(i,j,k)+F18*fh(i-1,j,k) &
-F6*fh(i-2,j,k)+ fh(i-3,j,k))
! set imax and imin_lopsided 0
endif
elseif(Sfx(i,j,k) < ZEO)then
if(i-3 >= imin_lopsided)then
! v
! D f = ------[ - 3f - 10f + 18f - 6f + f ]
! i 12dx i-v i i+v i+2v i+3v
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfx(i,j,k)*d12dx*(-F3*fh(i+1,j,k)-F10*fh(i,j,k)+F18*fh(i-1,j,k) &
-F6*fh(i-2,j,k)+ fh(i-3,j,k))
elseif(i-2 >= imin_lopsided)then
!
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
! fx(i) = ---------------------------------------------
! 12 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
elseif(i-1 >= imin_lopsided)then
! v
! D f = ------[ 3f + 10f - 18f + 6f - f ]
! i 12dx i+v i i-v i-2v i-3v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d12dx*(-F3*fh(i-1,j,k)-F10*fh(i,j,k)+F18*fh(i+1,j,k) &
-F6*fh(i+2,j,k)+ fh(i+3,j,k))
! set imax and imin_lopsided 0
endif
endif
! y direction
if(Sfy(i,j,k) > ZEO)then
if(j+3 <= jmax)then
! v
! D f = ------[ - 3f - 10f + 18f - 6f + f ]
! i 12dx i-v i i+v i+2v i+3v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d12dy*(-F3*fh(i,j-1,k)-F10*fh(i,j,k)+F18*fh(i,j+1,k) &
-F6*fh(i,j+2,k)+ fh(i,j+3,k))
elseif(j+2 <= jmax)then
!
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
! fx(i) = ---------------------------------------------
! 12 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
elseif(j+1 <= jmax)then
! v
! D f = ------[ 3f + 10f - 18f + 6f - f ]
! i 12dx i+v i i-v i-2v i-3v
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfy(i,j,k)*d12dy*(-F3*fh(i,j+1,k)-F10*fh(i,j,k)+F18*fh(i,j-1,k) &
-F6*fh(i,j-2,k)+ fh(i,j-3,k))
! set imax and imin_lopsided 0
endif
elseif(Sfy(i,j,k) < ZEO)then
if(j-3 >= jmin_lopsided)then
! v
! D f = ------[ - 3f - 10f + 18f - 6f + f ]
! i 12dx i-v i i+v i+2v i+3v
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfy(i,j,k)*d12dy*(-F3*fh(i,j+1,k)-F10*fh(i,j,k)+F18*fh(i,j-1,k) &
-F6*fh(i,j-2,k)+ fh(i,j-3,k))
elseif(j-2 >= jmin_lopsided)then
!
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
! fx(i) = ---------------------------------------------
! 12 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
elseif(j-1 >= jmin_lopsided)then
! v
! D f = ------[ 3f + 10f - 18f + 6f - f ]
! i 12dx i+v i i-v i-2v i-3v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d12dy*(-F3*fh(i,j-1,k)-F10*fh(i,j,k)+F18*fh(i,j+1,k) &
-F6*fh(i,j+2,k)+ fh(i,j+3,k))
! set jmax and jmin_lopsided 0
endif
endif
! z direction
if(Sfz(i,j,k) > ZEO)then
if(k+3 <= kmax)then
! v
! D f = ------[ - 3f - 10f + 18f - 6f + f ]
! i 12dx i-v i i+v i+2v i+3v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k-1)-F10*fh(i,j,k)+F18*fh(i,j,k+1) &
-F6*fh(i,j,k+2)+ fh(i,j,k+3))
elseif(k+2 <= kmax)then
!
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
! fx(i) = ---------------------------------------------
! 12 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
elseif(k+1 <= kmax)then
! v
! D f = ------[ 3f + 10f - 18f + 6f - f ]
! i 12dx i+v i i-v i-2v i-3v
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k+1)-F10*fh(i,j,k)+F18*fh(i,j,k-1) &
-F6*fh(i,j,k-2)+ fh(i,j,k-3))
! set imax and imin_lopsided 0
endif
elseif(Sfz(i,j,k) < ZEO)then
if(k-3 >= kmin_lopsided)then
! v
! D f = ------[ - 3f - 10f + 18f - 6f + f ]
! i 12dx i-v i i+v i+2v i+3v
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k+1)-F10*fh(i,j,k)+F18*fh(i,j,k-1) &
-F6*fh(i,j,k-2)+ fh(i,j,k-3))
elseif(k-2 >= kmin_lopsided)then
!
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
! fx(i) = ---------------------------------------------
! 12 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
elseif(k-1 >= kmin_lopsided)then
! v
! D f = ------[ 3f + 10f - 18f + 6f - f ]
! i 12dx i+v i i-v i-2v i-3v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k-1)-F10*fh(i,j,k)+F18*fh(i,j,k+1) &
-F6*fh(i,j,k+2)+ fh(i,j,k+3))
! set kmax and kmin_lopsided 0
endif
endif
if(i-3 >= imin_kodis .and. i+3 <= imax .and. &
j-3 >= jmin_kodis .and. j+3 <= jmax .and. &
k-3 >= kmin_kodis .and. k+3 <= kmax) then
! calculation order if important ?
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/cof *( ( &
(fh(i-3,j,k)+fh(i+3,j,k)) - &
SIX*(fh(i-2,j,k)+fh(i+2,j,k)) + &
FIT*(fh(i-1,j,k)+fh(i+1,j,k)) - &
TWT* fh(i,j,k) )/dX + &
( &
(fh(i,j-3,k)+fh(i,j+3,k)) - &
SIX*(fh(i,j-2,k)+fh(i,j+2,k)) + &
FIT*(fh(i,j-1,k)+fh(i,j+1,k)) - &
TWT* fh(i,j,k) )/dY + &
( &
(fh(i,j,k-3)+fh(i,j,k+3)) - &
SIX*(fh(i,j,k-2)+fh(i,j,k+2)) + &
FIT*(fh(i,j,k-1)+fh(i,j,k+1)) - &
TWT* fh(i,j,k) )/dZ )
endif
enddo
enddo
enddo
return
end subroutine merge_lopsided_kodis

File diff suppressed because it is too large Load Diff

View File

@@ -19,60 +19,48 @@
!~~~~~~~> Local variable: !~~~~~~~> Local variable:
integer :: i,j,k real*8, dimension(ex(1),ex(2),ex(3)) :: trA,detg
real*8 :: lgxx,lgyy,lgzz,ldetg real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz
real*8 :: lgupxx,lgupxy,lgupxz,lgupyy,lgupyz,lgupzz real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz,gupyy,gupyz,gupzz
real*8 :: ltrA,lscale
real*8, parameter :: F1o3 = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0 real*8, parameter :: F1o3 = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0
!~~~~~~> !~~~~~~>
do k=1,ex(3) gxx = dxx + ONE
do j=1,ex(2) gyy = dyy + ONE
do i=1,ex(1) gzz = dzz + ONE
lgxx = dxx(i,j,k) + ONE detg = gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - &
lgyy = dyy(i,j,k) + ONE gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz
lgzz = dzz(i,j,k) + ONE gupxx = ( gyy * gzz - gyz * gyz ) / detg
gupxy = - ( gxy * gzz - gyz * gxz ) / detg
gupxz = ( gxy * gyz - gyy * gxz ) / detg
gupyy = ( gxx * gzz - gxz * gxz ) / detg
gupyz = - ( gxx * gyz - gxy * gxz ) / detg
gupzz = ( gxx * gyy - gxy * gxy ) / detg
ldetg = lgxx * lgyy * lgzz & trA = gupxx * Axx + gupyy * Ayy + gupzz * Azz &
+ gxy(i,j,k) * gyz(i,j,k) * gxz(i,j,k) & + TWO * (gupxy * Axy + gupxz * Axz + gupyz * Ayz)
+ gxz(i,j,k) * gxy(i,j,k) * gyz(i,j,k) &
- gxz(i,j,k) * lgyy * gxz(i,j,k) &
- gxy(i,j,k) * gxy(i,j,k) * lgzz &
- lgxx * gyz(i,j,k) * gyz(i,j,k)
lgupxx = ( lgyy * lgzz - gyz(i,j,k) * gyz(i,j,k) ) / ldetg Axx = Axx - F1o3 * gxx * trA
lgupxy = - ( gxy(i,j,k) * lgzz - gyz(i,j,k) * gxz(i,j,k) ) / ldetg Axy = Axy - F1o3 * gxy * trA
lgupxz = ( gxy(i,j,k) * gyz(i,j,k) - lgyy * gxz(i,j,k) ) / ldetg Axz = Axz - F1o3 * gxz * trA
lgupyy = ( lgxx * lgzz - gxz(i,j,k) * gxz(i,j,k) ) / ldetg Ayy = Ayy - F1o3 * gyy * trA
lgupyz = - ( lgxx * gyz(i,j,k) - gxy(i,j,k) * gxz(i,j,k) ) / ldetg Ayz = Ayz - F1o3 * gyz * trA
lgupzz = ( lgxx * lgyy - gxy(i,j,k) * gxy(i,j,k) ) / ldetg Azz = Azz - F1o3 * gzz * trA
ltrA = lgupxx * Axx(i,j,k) + lgupyy * Ayy(i,j,k) & detg = ONE / ( detg ** F1o3 )
+ lgupzz * Azz(i,j,k) &
+ TWO * (lgupxy * Axy(i,j,k) + lgupxz * Axz(i,j,k) &
+ lgupyz * Ayz(i,j,k))
Axx(i,j,k) = Axx(i,j,k) - F1o3 * lgxx * ltrA gxx = gxx * detg
Axy(i,j,k) = Axy(i,j,k) - F1o3 * gxy(i,j,k) * ltrA gxy = gxy * detg
Axz(i,j,k) = Axz(i,j,k) - F1o3 * gxz(i,j,k) * ltrA gxz = gxz * detg
Ayy(i,j,k) = Ayy(i,j,k) - F1o3 * lgyy * ltrA gyy = gyy * detg
Ayz(i,j,k) = Ayz(i,j,k) - F1o3 * gyz(i,j,k) * ltrA gyz = gyz * detg
Azz(i,j,k) = Azz(i,j,k) - F1o3 * lgzz * ltrA gzz = gzz * detg
lscale = ONE / ( ldetg ** F1o3 ) dxx = gxx - ONE
dyy = gyy - ONE
dxx(i,j,k) = lgxx * lscale - ONE dzz = gzz - ONE
gxy(i,j,k) = gxy(i,j,k) * lscale
gxz(i,j,k) = gxz(i,j,k) * lscale
dyy(i,j,k) = lgyy * lscale - ONE
gyz(i,j,k) = gyz(i,j,k) * lscale
dzz(i,j,k) = lgzz * lscale - ONE
enddo
enddo
enddo
return return
@@ -95,70 +83,50 @@
!~~~~~~~> Local variable: !~~~~~~~> Local variable:
integer :: i,j,k real*8, dimension(ex(1),ex(2),ex(3)) :: trA
real*8 :: lgxx,lgyy,lgzz,lscale real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz
real*8 :: lgxy,lgxz,lgyz real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz,gupyy,gupyz,gupzz
real*8 :: lgupxx,lgupxy,lgupxz,lgupyy,lgupyz,lgupzz
real*8 :: ltrA
real*8, parameter :: F1o3 = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0 real*8, parameter :: F1o3 = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0
!~~~~~~> !~~~~~~>
do k=1,ex(3) gxx = dxx + ONE
do j=1,ex(2) gyy = dyy + ONE
do i=1,ex(1) gzz = dzz + ONE
! for g
gupzz = gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - &
gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz
! for g: normalize determinant first gupzz = ONE / ( gupzz ** F1o3 )
lgxx = dxx(i,j,k) + ONE
lgyy = dyy(i,j,k) + ONE
lgzz = dzz(i,j,k) + ONE
lgxy = gxy(i,j,k)
lgxz = gxz(i,j,k)
lgyz = gyz(i,j,k)
lscale = lgxx * lgyy * lgzz + lgxy * lgyz * lgxz & gxx = gxx * gupzz
+ lgxz * lgxy * lgyz - lgxz * lgyy * lgxz & gxy = gxy * gupzz
- lgxy * lgxy * lgzz - lgxx * lgyz * lgyz gxz = gxz * gupzz
gyy = gyy * gupzz
gyz = gyz * gupzz
gzz = gzz * gupzz
lscale = ONE / ( lscale ** F1o3 ) dxx = gxx - ONE
dyy = gyy - ONE
dzz = gzz - ONE
! for A
lgxx = lgxx * lscale gupxx = ( gyy * gzz - gyz * gyz )
lgxy = lgxy * lscale gupxy = - ( gxy * gzz - gyz * gxz )
lgxz = lgxz * lscale gupxz = ( gxy * gyz - gyy * gxz )
lgyy = lgyy * lscale gupyy = ( gxx * gzz - gxz * gxz )
lgyz = lgyz * lscale gupyz = - ( gxx * gyz - gxy * gxz )
lgzz = lgzz * lscale gupzz = ( gxx * gyy - gxy * gxy )
dxx(i,j,k) = lgxx - ONE trA = gupxx * Axx + gupyy * Ayy + gupzz * Azz &
gxy(i,j,k) = lgxy + TWO * (gupxy * Axy + gupxz * Axz + gupyz * Ayz)
gxz(i,j,k) = lgxz
dyy(i,j,k) = lgyy - ONE
gyz(i,j,k) = lgyz
dzz(i,j,k) = lgzz - ONE
! for A: trace-free using normalized metric (det=1, no division needed) Axx = Axx - F1o3 * gxx * trA
lgupxx = ( lgyy * lgzz - lgyz * lgyz ) Axy = Axy - F1o3 * gxy * trA
lgupxy = - ( lgxy * lgzz - lgyz * lgxz ) Axz = Axz - F1o3 * gxz * trA
lgupxz = ( lgxy * lgyz - lgyy * lgxz ) Ayy = Ayy - F1o3 * gyy * trA
lgupyy = ( lgxx * lgzz - lgxz * lgxz ) Ayz = Ayz - F1o3 * gyz * trA
lgupyz = - ( lgxx * lgyz - lgxy * lgxz ) Azz = Azz - F1o3 * gzz * trA
lgupzz = ( lgxx * lgyy - lgxy * lgxy )
ltrA = lgupxx * Axx(i,j,k) + lgupyy * Ayy(i,j,k) &
+ lgupzz * Azz(i,j,k) &
+ TWO * (lgupxy * Axy(i,j,k) + lgupxz * Axz(i,j,k) &
+ lgupyz * Ayz(i,j,k))
Axx(i,j,k) = Axx(i,j,k) - F1o3 * lgxx * ltrA
Axy(i,j,k) = Axy(i,j,k) - F1o3 * lgxy * ltrA
Axz(i,j,k) = Axz(i,j,k) - F1o3 * lgxz * ltrA
Ayy(i,j,k) = Ayy(i,j,k) - F1o3 * lgyy * ltrA
Ayz(i,j,k) = Ayz(i,j,k) - F1o3 * lgyz * ltrA
Azz(i,j,k) = Azz(i,j,k) - F1o3 * lgzz * ltrA
enddo
enddo
enddo
return return

View File

@@ -324,9 +324,9 @@ subroutine symmetry_bd(ord,extc,func,funcc,SoA)
integer::i integer::i
funcc = 0.d0
funcc(1:extc(1),1:extc(2),1:extc(3)) = func funcc(1:extc(1),1:extc(2),1:extc(3)) = func
do i=0,ord-1 do i=0,ord-1
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1) funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1)
enddo enddo
do i=0,ord-1 do i=0,ord-1
@@ -350,6 +350,7 @@ subroutine symmetry_tbd(ord,extc,func,funcc,SoA)
integer::i integer::i
funcc = 0.d0
funcc(1:extc(1),1:extc(2),1:extc(3)) = func funcc(1:extc(1),1:extc(2),1:extc(3)) = func
do i=0,ord-1 do i=0,ord-1
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1) funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1)
@@ -378,6 +379,7 @@ subroutine symmetry_stbd(ord,extc,func,funcc,SoA)
integer::i integer::i
funcc = 0.d0
funcc(1:extc(1),1:extc(2),1:extc(3)) = func funcc(1:extc(1),1:extc(2),1:extc(3)) = func
do i=0,ord-1 do i=0,ord-1
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1) funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1)
@@ -884,6 +886,7 @@ subroutine symmetry_bd(ord,extc,func,funcc,SoA)
integer::i integer::i
funcc = 0.d0
funcc(1:extc(1),1:extc(2),1:extc(3)) = func funcc(1:extc(1),1:extc(2),1:extc(3)) = func
do i=0,ord-1 do i=0,ord-1
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1) funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1)
@@ -909,6 +912,7 @@ subroutine symmetry_tbd(ord,extc,func,funcc,SoA)
integer::i integer::i
funcc = 0.d0
funcc(1:extc(1),1:extc(2),1:extc(3)) = func funcc(1:extc(1),1:extc(2),1:extc(3)) = func
do i=0,ord-1 do i=0,ord-1
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1) funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1)
@@ -937,6 +941,7 @@ subroutine symmetry_stbd(ord,extc,func,funcc,SoA)
integer::i integer::i
funcc = 0.d0
funcc(1:extc(1),1:extc(2),1:extc(3)) = func funcc(1:extc(1),1:extc(2),1:extc(3)) = func
do i=0,ord-1 do i=0,ord-1
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1) funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1)
@@ -1112,7 +1117,6 @@ end subroutine d2dump
!------------------------------------------------------------------------------ !------------------------------------------------------------------------------
! Lagrangian polynomial interpolation ! Lagrangian polynomial interpolation
!------------------------------------------------------------------------------ !------------------------------------------------------------------------------
subroutine polint(xa, ya, x, y, dy, ordn) subroutine polint(xa, ya, x, y, dy, ordn)
implicit none implicit none
@@ -1125,6 +1129,7 @@ end subroutine d2dump
real*8, dimension(ordn) :: c, d, ho real*8, dimension(ordn) :: c, d, ho
real*8 :: dif, dift, hp, h, den_val real*8 :: dif, dift, hp, h, den_val
! Initialization
c = ya c = ya
d = ya d = ya
ho = xa - x ho = xa - x
@@ -1132,6 +1137,7 @@ end subroutine d2dump
ns = 1 ns = 1
dif = abs(x - xa(1)) dif = abs(x - xa(1))
! Find the index of the closest table entry
do i = 2, ordn do i = 2, ordn
dift = abs(x - xa(i)) dift = abs(x - xa(i))
if (dift < dif) then if (dift < dif) then
@@ -1143,6 +1149,7 @@ end subroutine d2dump
y = ya(ns) y = ya(ns)
ns = ns - 1 ns = ns - 1
! Main Neville's algorithm loop
do m = 1, ordn - 1 do m = 1, ordn - 1
n_m = ordn - m n_m = ordn - m
do i = 1, n_m do i = 1, n_m
@@ -1150,18 +1157,22 @@ end subroutine d2dump
h = ho(i+m) h = ho(i+m)
den_val = hp - h den_val = hp - h
! Check for division by zero locally
if (den_val == 0.0d0) then if (den_val == 0.0d0) then
write(*,*) 'failure in polint for point',x write(*,*) 'failure in polint for point',x
write(*,*) 'with input points: ',xa write(*,*) 'with input points: ',xa
stop stop
end if end if
! Reuse den_val to avoid redundant divisions
den_val = (c(i+1) - d(i)) / den_val den_val = (c(i+1) - d(i)) / den_val
! Update c and d in place
d(i) = h * den_val d(i) = h * den_val
c(i) = hp * den_val c(i) = hp * den_val
end do end do
! Decide which path (up or down the tableau) to take
if (2 * ns < n_m) then if (2 * ns < n_m) then
dy = c(ns + 1) dy = c(ns + 1)
else else
@@ -1178,36 +1189,26 @@ end subroutine d2dump
! interpolation in 2 dimensions, follow yx order ! interpolation in 2 dimensions, follow yx order
! !
!------------------------------------------------------------------------------ !------------------------------------------------------------------------------
subroutine polin2(x1a,x2a,ya,x1,x2,y,dy,ordn) subroutine polin2(x1a,x2a,ya,x1,x2,y,dy,ordn)
implicit none implicit none
integer,intent(in) :: ordn integer,intent(in) :: ordn
real*8, dimension(1:ordn), intent(in) :: x1a,x2a real*8, dimension(ordn), intent(in) :: x1a,x2a
real*8, dimension(1:ordn,1:ordn), intent(in) :: ya real*8, dimension(ordn,ordn), intent(in) :: ya
real*8, intent(in) :: x1,x2 real*8, intent(in) :: x1,x2
real*8, intent(out) :: y,dy real*8, intent(out) :: y,dy
#ifdef POLINT_LEGACY_ORDER
integer :: i,m
real*8, dimension(ordn) :: ymtmp
real*8, dimension(ordn) :: yntmp
m=size(x1a)
do i=1,m
yntmp=ya(i,:)
call polint(x2a,yntmp,x2,ymtmp(i),dy,ordn)
end do
call polint(x1a,ymtmp,x1,y,dy,ordn)
#else
integer :: j integer :: j
real*8, dimension(ordn) :: ymtmp real*8, dimension(ordn) :: ymtmp
real*8 :: dy_temp real*8 :: dy_temp ! Local variable to prevent overwriting result
! Optimized sequence: Loop over columns (j)
! ya(:,j) is a contiguous memory block in Fortran
do j=1,ordn do j=1,ordn
call polint(x1a, ya(:,j), x1, ymtmp(j), dy_temp, ordn) call polint(x1a, ya(:,j), x1, ymtmp(j), dy_temp, ordn)
end do end do
! Final interpolation on the results
call polint(x2a, ymtmp, x2, y, dy, ordn) call polint(x2a, ymtmp, x2, y, dy, ordn)
#endif
return return
end subroutine polin2 end subroutine polin2
@@ -1216,49 +1217,35 @@ end subroutine d2dump
! interpolation in 3 dimensions, follow zyx order ! interpolation in 3 dimensions, follow zyx order
! !
!------------------------------------------------------------------------------ !------------------------------------------------------------------------------
subroutine polin3(x1a,x2a,x3a,ya,x1,x2,x3,y,dy,ordn) subroutine polin3(x1a,x2a,x3a,ya,x1,x2,x3,y,dy,ordn)
implicit none implicit none
integer,intent(in) :: ordn integer,intent(in) :: ordn
real*8, dimension(1:ordn), intent(in) :: x1a,x2a,x3a real*8, dimension(ordn), intent(in) :: x1a,x2a,x3a
real*8, dimension(1:ordn,1:ordn,1:ordn), intent(in) :: ya real*8, dimension(ordn,ordn,ordn), intent(in) :: ya
real*8, intent(in) :: x1,x2,x3 real*8, intent(in) :: x1,x2,x3
real*8, intent(out) :: y,dy real*8, intent(out) :: y,dy
#ifdef POLINT_LEGACY_ORDER
integer :: i,j,m,n
real*8, dimension(ordn,ordn) :: yatmp
real*8, dimension(ordn) :: ymtmp
real*8, dimension(ordn) :: yntmp
real*8, dimension(ordn) :: yqtmp
m=size(x1a)
n=size(x2a)
do i=1,m
do j=1,n
yqtmp=ya(i,j,:)
call polint(x3a,yqtmp,x3,yatmp(i,j),dy,ordn)
end do
yntmp=yatmp(i,:)
call polint(x2a,yntmp,x2,ymtmp(i),dy,ordn)
end do
call polint(x1a,ymtmp,x1,y,dy,ordn)
#else
integer :: j, k integer :: j, k
real*8, dimension(ordn,ordn) :: yatmp real*8, dimension(ordn,ordn) :: yatmp
real*8, dimension(ordn) :: ymtmp real*8, dimension(ordn) :: ymtmp
real*8 :: dy_temp real*8 :: dy_temp
! Sequence change: Process the contiguous first dimension (x1) first.
! We loop through the 'slow' planes (j, k) to extract 'fast' columns.
do k=1,ordn do k=1,ordn
do j=1,ordn do j=1,ordn
! ya(:,j,k) is contiguous; much faster than ya(i,j,:)
call polint(x1a, ya(:,j,k), x1, yatmp(j,k), dy_temp, ordn) call polint(x1a, ya(:,j,k), x1, yatmp(j,k), dy_temp, ordn)
end do end do
end do end do
! Now process the second dimension
do k=1,ordn do k=1,ordn
call polint(x2a, yatmp(:,k), x2, ymtmp(k), dy_temp, ordn) call polint(x2a, yatmp(:,k), x2, ymtmp(k), dy_temp, ordn)
end do end do
! Final dimension
call polint(x3a, ymtmp, x3, y, dy, ordn) call polint(x3a, ymtmp, x3, y, dy, ordn)
#endif
return return
end subroutine polin3 end subroutine polin3
@@ -1280,9 +1267,7 @@ end subroutine d2dump
real*8 :: dX, dY, dZ real*8 :: dX, dY, dZ
integer::imin,jmin,kmin integer::imin,jmin,kmin
integer::imax,jmax,kmax integer::imax,jmax,kmax
integer::i,j,k,n_elements integer::i,j,k
real*8, dimension(:), allocatable :: f_flat
real*8, external :: DDOT
dX = X(2) - X(1) dX = X(2) - X(1)
dY = Y(2) - Y(1) dY = Y(2) - Y(1)
@@ -1306,12 +1291,7 @@ if(dabs(X(1)-xmin) < dX) imin = 1
if(dabs(Y(1)-ymin) < dY) jmin = 1 if(dabs(Y(1)-ymin) < dY) jmin = 1
if(dabs(Z(1)-zmin) < dZ) kmin = 1 if(dabs(Z(1)-zmin) < dZ) kmin = 1
! Optimized with oneMKL BLAS DDOT for dot product f_out = sum(f(imin:imax,jmin:jmax,kmin:kmax)*f(imin:imax,jmin:jmax,kmin:kmax))
n_elements = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1)
allocate(f_flat(n_elements))
f_flat = reshape(f(imin:imax,jmin:jmax,kmin:kmax), [n_elements])
f_out = DDOT(n_elements, f_flat, 1, f_flat, 1)
deallocate(f_flat)
f_out = f_out*dX*dY*dZ f_out = f_out*dX*dY*dZ
@@ -1336,9 +1316,7 @@ f_out = f_out*dX*dY*dZ
real*8 :: dX, dY, dZ real*8 :: dX, dY, dZ
integer::imin,jmin,kmin integer::imin,jmin,kmin
integer::imax,jmax,kmax integer::imax,jmax,kmax
integer::i,j,k,n_elements integer::i,j,k
real*8, dimension(:), allocatable :: f_flat
real*8, external :: DDOT
real*8 :: PIo4 real*8 :: PIo4
@@ -1401,12 +1379,7 @@ if(Symmetry==2)then
if(dabs(ymin+gw*dY)<dY.and.Y(1)<0.d0) jmin = gw+1 if(dabs(ymin+gw*dY)<dY.and.Y(1)<0.d0) jmin = gw+1
endif endif
! Optimized with oneMKL BLAS DDOT for dot product f_out = sum(f(imin:imax,jmin:jmax,kmin:kmax)*f(imin:imax,jmin:jmax,kmin:kmax))
n_elements = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1)
allocate(f_flat(n_elements))
f_flat = reshape(f(imin:imax,jmin:jmax,kmin:kmax), [n_elements])
f_out = DDOT(n_elements, f_flat, 1, f_flat, 1)
deallocate(f_flat)
f_out = f_out*dX*dY*dZ f_out = f_out*dX*dY*dZ
@@ -1434,8 +1407,6 @@ f_out = f_out*dX*dY*dZ
integer::imin,jmin,kmin integer::imin,jmin,kmin
integer::imax,jmax,kmax integer::imax,jmax,kmax
integer::i,j,k integer::i,j,k
real*8, dimension(:), allocatable :: f_flat
real*8, external :: DDOT
real*8 :: PIo4 real*8 :: PIo4
@@ -1498,12 +1469,11 @@ if(Symmetry==2)then
if(dabs(ymin+gw*dY)<dY.and.Y(1)<0.d0) jmin = gw+1 if(dabs(ymin+gw*dY)<dY.and.Y(1)<0.d0) jmin = gw+1
endif endif
! Optimized with oneMKL BLAS DDOT for dot product f_out = sum(f(imin:imax,jmin:jmax,kmin:kmax)*f(imin:imax,jmin:jmax,kmin:kmax))
f_out = f_out
Nout = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1) Nout = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1)
allocate(f_flat(Nout))
f_flat = reshape(f(imin:imax,jmin:jmax,kmin:kmax), [Nout])
f_out = DDOT(Nout, f_flat, 1, f_flat, 1)
deallocate(f_flat)
return return
@@ -1701,7 +1671,6 @@ deallocate(f_flat)
real*8, dimension(ORDN,ORDN) :: tmp2 real*8, dimension(ORDN,ORDN) :: tmp2
real*8, dimension(ORDN) :: tmp1 real*8, dimension(ORDN) :: tmp1
real*8, dimension(3) :: SoAh real*8, dimension(3) :: SoAh
real*8, external :: DDOT
! +1 because c++ gives 0 for first point ! +1 because c++ gives 0 for first point
cxB = inds+1 cxB = inds+1
@@ -1737,21 +1706,20 @@ deallocate(f_flat)
ya=fh(cxB(1):cxT(1),cxB(2):cxT(2),cxB(3):cxT(3)) ya=fh(cxB(1):cxT(1),cxB(2):cxT(2),cxB(3):cxT(3))
endif endif
! Optimized with BLAS operations for better performance
! First dimension: z-direction weighted sum
tmp2=0 tmp2=0
do m=1,ORDN do m=1,ORDN
tmp2 = tmp2 + coef(2*ORDN+m)*ya(:,:,m) tmp2 = tmp2 + coef(2*ORDN+m)*ya(:,:,m)
enddo enddo
! Second dimension: y-direction weighted sum
tmp1=0 tmp1=0
do m=1,ORDN do m=1,ORDN
tmp1 = tmp1 + coef(ORDN+m)*tmp2(:,m) tmp1 = tmp1 + coef(ORDN+m)*tmp2(:,m)
enddo enddo
! Third dimension: x-direction weighted sum using BLAS DDOT f_int=0
f_int = DDOT(ORDN, coef(1:ORDN), 1, tmp1, 1) do m=1,ORDN
f_int = f_int + coef(m)*tmp1(m)
enddo
return return
@@ -1781,7 +1749,6 @@ deallocate(f_flat)
real*8, dimension(ORDN,ORDN) :: ya real*8, dimension(ORDN,ORDN) :: ya
real*8, dimension(ORDN) :: tmp1 real*8, dimension(ORDN) :: tmp1
real*8, dimension(2) :: SoAh real*8, dimension(2) :: SoAh
real*8, external :: DDOT
! +1 because c++ gives 0 for first point ! +1 because c++ gives 0 for first point
cxB = inds(1:2)+1 cxB = inds(1:2)+1
@@ -1811,14 +1778,15 @@ deallocate(f_flat)
ya=fh(cxB(1):cxT(1),cxB(2):cxT(2),inds(3)) ya=fh(cxB(1):cxT(1),cxB(2):cxT(2),inds(3))
endif endif
! Optimized with BLAS operations
tmp1=0 tmp1=0
do m=1,ORDN do m=1,ORDN
tmp1 = tmp1 + coef(ORDN+m)*ya(:,m) tmp1 = tmp1 + coef(ORDN+m)*ya(:,m)
enddo enddo
! Use BLAS DDOT for final weighted sum f_int=0
f_int = DDOT(ORDN, coef(1:ORDN), 1, tmp1, 1) do m=1,ORDN
f_int = f_int + coef(m)*tmp1(m)
enddo
return return
@@ -1849,7 +1817,6 @@ deallocate(f_flat)
real*8, dimension(ORDN) :: ya real*8, dimension(ORDN) :: ya
real*8 :: SoAh real*8 :: SoAh
integer,dimension(3) :: inds integer,dimension(3) :: inds
real*8, external :: DDOT
! +1 because c++ gives 0 for first point ! +1 because c++ gives 0 for first point
inds = indsi + 1 inds = indsi + 1
@@ -1910,8 +1877,10 @@ deallocate(f_flat)
write(*,*)"error in global_interpind1d, not recognized dumyd = ",dumyd write(*,*)"error in global_interpind1d, not recognized dumyd = ",dumyd
endif endif
! Optimized with BLAS DDOT for weighted sum f_int=0
f_int = DDOT(ORDN, coef, 1, ya, 1) do m=1,ORDN
f_int = f_int + coef(m)*ya(m)
enddo
return return
@@ -2143,38 +2112,24 @@ deallocate(f_flat)
end function fWigner_d_function end function fWigner_d_function
!---------------------------------- !----------------------------------
! Optimized factorial function using lookup table for small N
! and log-gamma for large N to avoid overflow
function ffact(N) result(gont) function ffact(N) result(gont)
implicit none implicit none
integer,intent(in) :: N integer,intent(in) :: N
real*8 :: gont real*8 :: gont
integer :: i
! Lookup table for factorials 0! to 20! (precomputed) integer :: i
real*8, parameter, dimension(0:20) :: fact_table = [ &
1.d0, 1.d0, 2.d0, 6.d0, 24.d0, 120.d0, 720.d0, 5040.d0, 40320.d0, &
362880.d0, 3628800.d0, 39916800.d0, 479001600.d0, 6227020800.d0, &
87178291200.d0, 1307674368000.d0, 20922789888000.d0, &
355687428096000.d0, 6402373705728000.d0, 121645100408832000.d0, &
2432902008176640000.d0 ]
! sanity check ! sanity check
if(N < 0)then if(N < 0)then
write(*,*) "ffact: error input for factorial" write(*,*) "ffact: error input for factorial"
gont = 1.d0
return return
endif endif
! Use lookup table for small N (fast path) gont = 1.d0
if(N <= 20)then do i=1,N
gont = fact_table(N) gont = gont*i
else enddo
! Use log-gamma function for large N: N! = exp(log_gamma(N+1))
! This avoids overflow and is computed efficiently
gont = exp(log_gamma(dble(N+1)))
endif
return return
@@ -2308,3 +2263,4 @@ subroutine find_maximum(ext,X,Y,Z,fun,val,pos,llb,uub)
return return
end subroutine end subroutine

View File

@@ -16,66 +16,115 @@ using namespace std;
#include <string.h> #include <string.h>
#include <math.h> #include <math.h>
#endif #endif
/* Linear equation solution by Gauss-Jordan elimination.
// Intel oneMKL LAPACK interface
#include <mkl_lapacke.h>
/* Linear equation solution using Intel oneMKL LAPACK.
a[0..n-1][0..n-1] is the input matrix. b[0..n-1] is input a[0..n-1][0..n-1] is the input matrix. b[0..n-1] is input
containing the right-hand side vectors. On output a is containing the right-hand side vectors. On output a is
replaced by its matrix inverse, and b is replaced by the replaced by its matrix inverse, and b is replaced by the
corresponding set of solution vectors. corresponding set of solution vectors */
Mathematical equivalence:
Solves: A * x = b => x = A^(-1) * b
Original Gauss-Jordan and LAPACK dgesv/dgetri produce identical results
within numerical precision. */
int gaussj(double *a, double *b, int n) int gaussj(double *a, double *b, int n)
{ {
// Allocate pivot array and workspace double swap;
lapack_int *ipiv = new lapack_int[n];
lapack_int info;
// Make a copy of matrix a for solving (dgesv modifies it to LU form) int *indxc, *indxr, *ipiv;
double *a_copy = new double[n * n]; indxc = new int[n];
for (int i = 0; i < n * n; i++) { indxr = new int[n];
a_copy[i] = a[i]; ipiv = new int[n];
int i, icol, irow, j, k, l, ll;
double big, dum, pivinv, temp;
for (j = 0; j < n; j++)
ipiv[j] = 0;
for (i = 0; i < n; i++)
{
big = 0.0;
for (j = 0; j < n; j++)
if (ipiv[j] != 1)
for (k = 0; k < n; k++)
{
if (ipiv[k] == 0)
{
if (fabs(a[j * n + k]) >= big)
{
big = fabs(a[j * n + k]);
irow = j;
icol = k;
}
}
else if (ipiv[k] > 1)
{
cout << "gaussj: Singular Matrix-1" << endl;
for (int ii = 0; ii < n; ii++)
{
for (int jj = 0; jj < n; jj++)
cout << a[ii * n + jj] << " ";
cout << endl;
}
return 1; // error return
}
} }
// Step 1: Solve linear system A*x = b using LU decomposition ipiv[icol] = ipiv[icol] + 1;
// LAPACKE_dgesv uses column-major by default, but we use row-major if (irow != icol)
info = LAPACKE_dgesv(LAPACK_ROW_MAJOR, n, 1, a_copy, n, ipiv, b, 1); {
for (l = 0; l < n; l++)
if (info != 0) { {
cout << "gaussj: Singular Matrix (dgesv info=" << info << ")" << endl; swap = a[irow * n + l];
delete[] ipiv; a[irow * n + l] = a[icol * n + l];
delete[] a_copy; a[icol * n + l] = swap;
return 1;
} }
// Step 2: Compute matrix inverse A^(-1) using LU factorization swap = b[irow];
// First do LU factorization of original matrix a b[irow] = b[icol];
info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, n, n, a, n, ipiv); b[icol] = swap;
if (info != 0) {
cout << "gaussj: Singular Matrix (dgetrf info=" << info << ")" << endl;
delete[] ipiv;
delete[] a_copy;
return 1;
} }
// Then compute inverse from LU factorization indxr[i] = irow;
info = LAPACKE_dgetri(LAPACK_ROW_MAJOR, n, a, n, ipiv); indxc[i] = icol;
if (info != 0) { if (a[icol * n + icol] == 0.0)
cout << "gaussj: Singular Matrix (dgetri info=" << info << ")" << endl; {
delete[] ipiv; cout << "gaussj: Singular Matrix-2" << endl;
delete[] a_copy; for (int ii = 0; ii < n; ii++)
return 1; {
for (int jj = 0; jj < n; jj++)
cout << a[ii * n + jj] << " ";
cout << endl;
}
return 1; // error return
} }
pivinv = 1.0 / a[icol * n + icol];
a[icol * n + icol] = 1.0;
for (l = 0; l < n; l++)
a[icol * n + l] *= pivinv;
b[icol] *= pivinv;
for (ll = 0; ll < n; ll++)
if (ll != icol)
{
dum = a[ll * n + icol];
a[ll * n + icol] = 0.0;
for (l = 0; l < n; l++)
a[ll * n + l] -= a[icol * n + l] * dum;
b[ll] -= b[icol] * dum;
}
}
for (l = n - 1; l >= 0; l--)
{
if (indxr[l] != indxc[l])
for (k = 0; k < n; k++)
{
swap = a[k * n + indxr[l]];
a[k * n + indxr[l]] = a[k * n + indxc[l]];
a[k * n + indxc[l]] = swap;
}
}
delete[] indxc;
delete[] indxr;
delete[] ipiv; delete[] ipiv;
delete[] a_copy;
return 0; return 0;
} }

View File

@@ -512,10 +512,11 @@
IMPLICIT DOUBLE PRECISION (A-H,O-Z) IMPLICIT DOUBLE PRECISION (A-H,O-Z)
DIMENSION V(N),W(N) DIMENSION V(N),W(N)
! SUBROUTINE TO COMPUTE DOUBLE PRECISION VECTOR DOT PRODUCT. ! SUBROUTINE TO COMPUTE DOUBLE PRECISION VECTOR DOT PRODUCT.
! Optimized using Intel oneMKL BLAS ddot
! Mathematical equivalence: DGVV = sum_{i=1}^{N} V(i)*W(i)
DOUBLE PRECISION, EXTERNAL :: DDOT SUM = 0.0D0
DGVV = DDOT(N, V, 1, W, 1) DO 10 I = 1,N
SUM = SUM + V(I)*W(I)
10 CONTINUE
DGVV = SUM
RETURN RETURN
END END

View File

@@ -6,6 +6,101 @@
! Vertex or Cell is distinguished in routine symmetry_bd which locates in ! Vertex or Cell is distinguished in routine symmetry_bd which locates in
! file "fmisc.f90" ! file "fmisc.f90"
#if (ghost_width == 2)
! second order code
!------------------------------------------------------------------------------------------------------------------------------
!usual type Kreiss-Oliger type numerical dissipation
!We support cell center only
! (D_+D_-)^2 =
! f(i-2) - 4 f(i-1) + 6 f(i) - 4 f(i+1) + f(i+2)
! ------------------------------------------------------
! dx^4
!------------------------------------------------------------------------------------------------------------------------------
! do not add dissipation near boundary
subroutine kodis(ex,X,Y,Z,f,f_rhs,SoA,Symmetry,eps)
implicit none
! argument variables
integer,intent(in) :: Symmetry
integer,dimension(3),intent(in)::ex
real*8, dimension(1:3), intent(in) :: SoA
double precision,intent(in),dimension(ex(1))::X
double precision,intent(in),dimension(ex(2))::Y
double precision,intent(in),dimension(ex(3))::Z
double precision,intent(in),dimension(ex(1),ex(2),ex(3))::f
double precision,intent(inout),dimension(ex(1),ex(2),ex(3))::f_rhs
real*8,intent(in) :: eps
!~~~~~~ other variables
real*8 :: dX,dY,dZ
real*8,dimension(-1:ex(1),-1:ex(2),-1:ex(3)) :: fh
integer :: imin,jmin,kmin,imax,jmax,kmax
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
real*8,parameter :: cof = 1.6d1 ! 2^4
real*8, parameter :: F4=4.d0,F6=6.d0
integer::i,j,k
dX = X(2)-X(1)
dY = Y(2)-Y(1)
dZ = Z(2)-Z(1)
imax = ex(1)
jmax = ex(2)
kmax = ex(3)
imin = 1
jmin = 1
kmin = 1
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -1
if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -1
if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -1
call symmetry_bd(2,ex,f,fh,SoA)
! f(i-2) - 4 f(i-1) + 6 f(i) - 4 f(i+1) + f(i+2)
! ------------------------------------------------------
! dx^4
! note the sign (-1)^r-1, now r=2
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
if(i-2 >= imin .and. i+2 <= imax .and. &
j-2 >= jmin .and. j+2 <= jmax .and. &
k-2 >= kmin .and. k+2 <= kmax) then
! x direction
f_rhs(i,j,k) = f_rhs(i,j,k) - eps/dX/cof * ( &
(fh(i-2,j,k)+fh(i+2,j,k)) &
- F4 * (fh(i-1,j,k)+fh(i+1,j,k)) &
+ F6 * fh(i,j,k) )
! y direction
f_rhs(i,j,k) = f_rhs(i,j,k) - eps/dY/cof * ( &
(fh(i,j-2,k)+fh(i,j+2,k)) &
- F4 * (fh(i,j-1,k)+fh(i,j+1,k)) &
+ F6 * fh(i,j,k) )
! z direction
f_rhs(i,j,k) = f_rhs(i,j,k) - eps/dZ/cof * ( &
(fh(i,j,k-2)+fh(i,j,k+2)) &
- F4 * (fh(i,j,k-1)+fh(i,j,k+1)) &
+ F6 * fh(i,j,k) )
endif
enddo
enddo
enddo
return
end subroutine kodis
#elif (ghost_width == 3)
! fourth order code ! fourth order code
!--------------------------------------------------------------------------------------------- !---------------------------------------------------------------------------------------------
@@ -61,7 +156,7 @@ integer, parameter :: NO_SYMM=0, OCTANT=2
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -2 if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -2
if(Symmetry == OCTANT .and. dabs(X(1)) < dX) imin = -2 if(Symmetry == OCTANT .and. dabs(X(1)) < dX) imin = -2
if(Symmetry == OCTANT .and. dabs(Y(1)) < dY) jmin = -2 if(Symmetry == OCTANT .and. dabs(Y(1)) < dY) jmin = -2
!print*,'imin,jmin,kmin=',imin,jmin,kmin
call symmetry_bd(3,ex,f,fh,SoA) call symmetry_bd(3,ex,f,fh,SoA)
do k=1,ex(3) do k=1,ex(3)
@@ -71,7 +166,28 @@ integer, parameter :: NO_SYMM=0, OCTANT=2
if(i-3 >= imin .and. i+3 <= imax .and. & if(i-3 >= imin .and. i+3 <= imax .and. &
j-3 >= jmin .and. j+3 <= jmax .and. & j-3 >= jmin .and. j+3 <= jmax .and. &
k-3 >= kmin .and. k+3 <= kmax) then k-3 >= kmin .and. k+3 <= kmax) then
#if 0
! x direction
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/dX/cof * ( &
(fh(i-3,j,k)+fh(i+3,j,k)) - &
SIX*(fh(i-2,j,k)+fh(i+2,j,k)) + &
FIT*(fh(i-1,j,k)+fh(i+1,j,k)) - &
TWT* fh(i,j,k) )
! y direction
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/dY/cof * ( &
(fh(i,j-3,k)+fh(i,j+3,k)) - &
SIX*(fh(i,j-2,k)+fh(i,j+2,k)) + &
FIT*(fh(i,j-1,k)+fh(i,j+1,k)) - &
TWT* fh(i,j,k) )
! z direction
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/dZ/cof * ( &
(fh(i,j,k-3)+fh(i,j,k+3)) - &
SIX*(fh(i,j,k-2)+fh(i,j,k+2)) + &
FIT*(fh(i,j,k-1)+fh(i,j,k+1)) - &
TWT* fh(i,j,k) )
#else
! calculation order if important ? ! calculation order if important ?
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/cof *( ( & f_rhs(i,j,k) = f_rhs(i,j,k) + eps/cof *( ( &
(fh(i-3,j,k)+fh(i+3,j,k)) - & (fh(i-3,j,k)+fh(i+3,j,k)) - &
@@ -88,7 +204,7 @@ integer, parameter :: NO_SYMM=0, OCTANT=2
SIX*(fh(i,j,k-2)+fh(i,j,k+2)) + & SIX*(fh(i,j,k-2)+fh(i,j,k+2)) + &
FIT*(fh(i,j,k-1)+fh(i,j,k+1)) - & FIT*(fh(i,j,k-1)+fh(i,j,k+1)) - &
TWT* fh(i,j,k) )/dZ ) TWT* fh(i,j,k) )/dZ )
#endif
endif endif
enddo enddo
@@ -99,6 +215,218 @@ integer, parameter :: NO_SYMM=0, OCTANT=2
end subroutine kodis end subroutine kodis
#elif (ghost_width == 4)
! sixth order code
!------------------------------------------------------------------------------------------------------------------------------
!usual type Kreiss-Oliger type numerical dissipation
!We support cell center only
! (D_+D_-)^4 =
! f(i-4) - 8 f(i-3) + 28 f(i-2) - 56 f(i-1) + 70 f(i) - 56 f(i+1) + 28 f(i+2) - 8 f(i+3) + f(i+4)
! ----------------------------------------------------------------------------------------------------------
! dx^8
!------------------------------------------------------------------------------------------------------------------------------
! do not add dissipation near boundary
subroutine kodis(ex,X,Y,Z,f,f_rhs,SoA,Symmetry,eps)
implicit none
! argument variables
integer,intent(in) :: Symmetry
integer,dimension(3),intent(in)::ex
real*8, dimension(1:3), intent(in) :: SoA
double precision,intent(in),dimension(ex(1))::X
double precision,intent(in),dimension(ex(2))::Y
double precision,intent(in),dimension(ex(3))::Z
double precision,intent(in),dimension(ex(1),ex(2),ex(3))::f
double precision,intent(inout),dimension(ex(1),ex(2),ex(3))::f_rhs
real*8,intent(in) :: eps
!~~~~~~ other variables
real*8 :: dX,dY,dZ
real*8,dimension(-3:ex(1),-3:ex(2),-3:ex(3)) :: fh
integer :: imin,jmin,kmin,imax,jmax,kmax
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
real*8,parameter :: cof = 2.56d2 ! 2^8
real*8, parameter :: F8=8.d0,F28=2.8d1,F56=5.6d1,F70=7.d1
integer::i,j,k
dX = X(2)-X(1)
dY = Y(2)-Y(1)
dZ = Z(2)-Z(1)
imax = ex(1)
jmax = ex(2)
kmax = ex(3)
imin = 1
jmin = 1
kmin = 1
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -3
if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -3
if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -3
call symmetry_bd(4,ex,f,fh,SoA)
! f(i-4) - 8 f(i-3) + 28 f(i-2) - 56 f(i-1) + 70 f(i) - 56 f(i+1) + 28 f(i+2) - 8 f(i+3) + f(i+4)
! ----------------------------------------------------------------------------------------------------------
! dx^8
! note the sign (-1)^r-1, now r=4
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
if(i>imin+3 .and. i < imax-3 .and. &
j>jmin+3 .and. j < jmax-3 .and. &
k>kmin+3 .and. k < kmax-3) then
! x direction
f_rhs(i,j,k) = f_rhs(i,j,k) - eps/dX/cof * ( &
(fh(i-4,j,k)+fh(i+4,j,k)) &
- F8 * (fh(i-3,j,k)+fh(i+3,j,k)) &
+F28 * (fh(i-2,j,k)+fh(i+2,j,k)) &
-F56 * (fh(i-1,j,k)+fh(i+1,j,k)) &
+F70 * fh(i,j,k) )
! y direction
f_rhs(i,j,k) = f_rhs(i,j,k) - eps/dY/cof * ( &
(fh(i,j-4,k)+fh(i,j+4,k)) &
- F8 * (fh(i,j-3,k)+fh(i,j+3,k)) &
+F28 * (fh(i,j-2,k)+fh(i,j+2,k)) &
-F56 * (fh(i,j-1,k)+fh(i,j+1,k)) &
+F70 * fh(i,j,k) )
! z direction
f_rhs(i,j,k) = f_rhs(i,j,k) - eps/dZ/cof * ( &
(fh(i,j,k-4)+fh(i,j,k+4)) &
- F8 * (fh(i,j,k-3)+fh(i,j,k+3)) &
+F28 * (fh(i,j,k-2)+fh(i,j,k+2)) &
-F56 * (fh(i,j,k-1)+fh(i,j,k+1)) &
+F70 * fh(i,j,k) )
endif
enddo
enddo
enddo
return
end subroutine kodis
#elif (ghost_width == 5)
! eighth order code
!------------------------------------------------------------------------------------------------------------------------------
!usual type Kreiss-Oliger type numerical dissipation
!We support cell center only
! Note the notation D_+ and D_- [P240 of B. Gustafsson, H.-O. Kreiss, and J. Oliger, Time
! Dependent Problems and Difference Methods (Wiley, New York, 1995).]
! D_+ = (f(i+1) - f(i))/h
! D_- = (f(i) - f(i-1))/h
! then we have D_+D_- = D_-D_+ = (f(i+1) - 2f(i) + f(i-1))/h^2
! for nth order accurate finite difference code, we need r =n/2+1
! D_+^rD_-^r = (D_+D_-)^r
! following the tradiation of PRD 77, 024027 (BB's calibration paper, Eq.(64),
! correct some typo according to above book) :
! + eps*(-1)^(r-1)*h^(2r-1)/2^(2r)*(D_+D_-)^r
!
!
! this is for 8th order accurate finite difference scheme
! (D_+D_-)^5 =
! f(i-5) - 10 f(i-4) + 45 f(i-3) - 120 f(i-2) + 210 f(i-1) - 252 f(i) + 210 f(i+1) - 120 f(i+2) + 45 f(i+3) - 10 f(i+4) + f(i+5)
! -------------------------------------------------------------------------------------------------------------------------------
! dx^10
!---------------------------------------------------------------------------------------------------------------------------------
! do not add dissipation near boundary
subroutine kodis(ex,X,Y,Z,f,f_rhs,SoA,Symmetry,eps)
implicit none
! argument variables
integer,intent(in) :: Symmetry
integer,dimension(3),intent(in)::ex
real*8, dimension(1:3), intent(in) :: SoA
double precision,intent(in),dimension(ex(1))::X
double precision,intent(in),dimension(ex(2))::Y
double precision,intent(in),dimension(ex(3))::Z
double precision,intent(in),dimension(ex(1),ex(2),ex(3))::f
double precision,intent(inout),dimension(ex(1),ex(2),ex(3))::f_rhs
real*8,intent(in) :: eps
!~~~~~~ other variables
real*8 :: dX,dY,dZ
real*8,dimension(-4:ex(1),-4:ex(2),-4:ex(3)) :: fh
integer :: imin,jmin,kmin,imax,jmax,kmax
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
real*8,parameter :: cof = 1.024d3 ! 2^2r = 2^10
real*8, parameter :: F10=1.d1,F45=4.5d1,F120=1.2d2,F210=2.1d2,F252=2.52d2
integer::i,j,k
dX = X(2)-X(1)
dY = Y(2)-Y(1)
dZ = Z(2)-Z(1)
imax = ex(1)
jmax = ex(2)
kmax = ex(3)
imin = 1
jmin = 1
kmin = 1
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -4
if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -4
if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -4
call symmetry_bd(5,ex,f,fh,SoA)
! f(i-5) - 10 f(i-4) + 45 f(i-3) - 120 f(i-2) + 210 f(i-1) - 252 f(i) + 210 f(i+1) - 120 f(i+2) + 45 f(i+3) - 10 f(i+4) + f(i+5)
! -------------------------------------------------------------------------------------------------------------------------------
! dx^10
! note the sign (-1)^r-1, now r=5
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
if(i>imin+4 .and. i < imax-4 .and. &
j>jmin+4 .and. j < jmax-4 .and. &
k>kmin+4 .and. k < kmax-4) then
! x direction
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/dX/cof * ( &
(fh(i-5,j,k)+fh(i+5,j,k)) &
- F10 * (fh(i-4,j,k)+fh(i+4,j,k)) &
+ F45 * (fh(i-3,j,k)+fh(i+3,j,k)) &
- F120* (fh(i-2,j,k)+fh(i+2,j,k)) &
+ F210* (fh(i-1,j,k)+fh(i+1,j,k)) &
- F252 * fh(i,j,k) )
! y direction
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/dY/cof * ( &
(fh(i,j-5,k)+fh(i,j+5,k)) &
- F10 * (fh(i,j-4,k)+fh(i,j+4,k)) &
+ F45 * (fh(i,j-3,k)+fh(i,j+3,k)) &
- F120* (fh(i,j-2,k)+fh(i,j+2,k)) &
+ F210* (fh(i,j-1,k)+fh(i,j+1,k)) &
- F252 * fh(i,j,k) )
! z direction
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/dZ/cof * ( &
(fh(i,j,k-5)+fh(i,j,k+5)) &
- F10 * (fh(i,j,k-4)+fh(i,j,k+4)) &
+ F45 * (fh(i,j,k-3)+fh(i,j,k+3)) &
- F120* (fh(i,j,k-2)+fh(i,j,k+2)) &
+ F210* (fh(i,j,k-1)+fh(i,j,k+1)) &
- F252 * fh(i,j,k) )
endif
enddo
enddo
enddo
return
end subroutine kodis
#endif

View File

@@ -7,7 +7,163 @@
! Vertex or Cell is distinguished in routine symmetry_bd which locates in ! Vertex or Cell is distinguished in routine symmetry_bd which locates in
! file "fmisc.f90" ! file "fmisc.f90"
#if (ghost_width == 2)
! second order code
!-----------------------------------------------------------------------------
! v
! D f = ------[ - 3 f + 4 f - f ]
! i 2dx i i+v i+2v
!
! where
!
! i
! |B |
! v = -----
! i
! B
!
!-----------------------------------------------------------------------------
subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
implicit none
!~~~~~~> Input parameters:
integer, intent(in) :: ex(1:3),Symmetry
real*8, intent(in) :: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
real*8,dimension(ex(1),ex(2),ex(3)),intent(in) :: f,Sfx,Sfy,Sfz
real*8,dimension(ex(1),ex(2),ex(3)),intent(inout):: f_rhs
real*8,dimension(3),intent(in) ::SoA
!~~~~~~> local variables:
! note index -1,0, so we have 2 extra points
real*8,dimension(-1:ex(1),-1:ex(2),-1:ex(3)) :: fh
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
real*8 :: dX,dY,dZ
real*8 :: d2dx,d2dy,d2dz
real*8, parameter :: ZEO=0.d0,ONE=1.d0,TWO=2.d0,THR=3.d0,FOUR=4.d0
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
dX = X(2)-X(1)
dY = Y(2)-Y(1)
dZ = Z(2)-Z(1)
d2dx = ONE/TWO/dX
d2dy = ONE/TWO/dY
d2dz = ONE/TWO/dZ
imax = ex(1)
jmax = ex(2)
kmax = ex(3)
imin = 1
jmin = 1
kmin = 1
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -1
if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -1
if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -1
call symmetry_bd(2,ex,f,fh,SoA)
! upper bound set ex-1 only for efficiency,
! the loop body will set ex 0 also
do k=1,ex(3)-1
do j=1,ex(2)-1
do i=1,ex(1)-1
! x direction
if(Sfx(i,j,k) >= ZEO)then
if( i+2 <= imax .and. i >= imin)then
! v
! D f = ------[ - 3 f + 4 f - f ]
! i 2dx i i+v i+2v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d2dx*(-THR*fh(i,j,k)+FOUR*fh(i+1,j,k)-fh(i+2,j,k))
elseif(i+1 <= imax .and. i >= imin)then
! v
! D f = ------[ - f + f ]
! i dx i i+v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d2dx*(-fh(i,j,k)+fh(i+1,j,k))
endif
elseif(Sfx(i,j,k) <= ZEO)then
if( i-2 >= imin .and. i <= imax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfx(i,j,k)*d2dx*(-THR*fh(i,j,k)+FOUR*fh(i-1,j,k)-fh(i-2,j,k))
elseif(i-1 >= imin .and. i <= imax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfx(i,j,k)*d2dx*(-fh(i,j,k)+fh(i-1,j,k))
endif
! set imax and imin 0
endif
! y direction
if(Sfy(i,j,k) >= ZEO)then
if( j+2 <= jmax .and. j >= jmin)then
! v
! D f = ------[ - 3 f + 4 f - f ]
! i 2dx i i+v i+2v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d2dy*(-THR*fh(i,j,k)+FOUR*fh(i,j+1,k)-fh(i,j+2,k))
elseif(j+1 <= jmax .and. j >= jmin)then
! v
! D f = ------[ - f + f ]
! i dx i i+v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d2dy*(-fh(i,j,k)+fh(i,j+1,k))
endif
elseif(Sfy(i,j,k) <= ZEO)then
if( j-2 >= jmin .and. j <= jmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfy(i,j,k)*d2dy*(-THR*fh(i,j,k)+FOUR*fh(i,j-1,k)-fh(i,j-2,k))
elseif(j-1 >= jmin .and. j <= jmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfy(i,j,k)*d2dy*(-fh(i,j,k)+fh(i,j-1,k))
endif
! set jmin and jmax 0
endif
!! z direction
if(Sfz(i,j,k) >= ZEO)then
if( k+2 <= kmax .and. k >= kmin)then
! v
! D f = ------[ - 3 f + 4 f - f ]
! i 2dx i i+v i+2v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d2dz*(-THR*fh(i,j,k)+FOUR*fh(i,j,k+1)-fh(i,j,k+2))
elseif(k+1 <= kmax .and. k >= kmin)then
! v
! D f = ------[ - f + f ]
! i dx i i+v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d2dz*(-fh(i,j,k)+fh(i,j,k+1))
endif
elseif(Sfz(i,j,k) <= ZEO)then
if( k-2 >= kmin .and. k <= kmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfz(i,j,k)*d2dz*(-THR*fh(i,j,k)+FOUR*fh(i,j,k-1)-fh(i,j,k-2))
elseif(k-1 >= kmin .and. k <= kmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfz(i,j,k)*d2dz*(-fh(i,j,k)+fh(i,j,k-1))
endif
! set kmin and kmax 0
endif
enddo
enddo
enddo
return
end subroutine lopsided
#elif (ghost_width == 3)
! fourth order code ! fourth order code
!----------------------------------------------------------------------------- !-----------------------------------------------------------------------------
@@ -80,7 +236,89 @@ subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
#if 0
!! old code
! x direction
if(Sfx(i,j,k) >= ZEO .and. i+3 <= imax .and. i-1 >= imin)then
! v
! D f = ------[ - 3f - 10f + 18f - 6f + f ]
! i 12dx i-v i i+v i+2v i+3v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d12dx*(-F3*fh(i-1,j,k)-F10*fh(i,j,k)+F18*fh(i+1,j,k) &
-F6*fh(i+2,j,k)+ fh(i+3,j,k))
elseif(Sfx(i,j,k) <= ZEO .and. i-3 >= imin .and. i+1 <= imax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfx(i,j,k)*d12dx*(-F3*fh(i+1,j,k)-F10*fh(i,j,k)+F18*fh(i-1,j,k) &
-F6*fh(i-2,j,k)+ fh(i-3,j,k))
elseif(i+2 <= imax .and. i-2 >= imin)then
!
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
! fx(i) = ---------------------------------------------
! 12 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
elseif(i+1 <= imax .and. i-1 >= imin)then
!
! - f(i-1) + f(i+1)
! fx(i) = --------------------------------
! 2 dx
f_rhs(i,j,k)=f_rhs(i,j,k) + Sfx(i,j,k)*d2dx*(-fh(i-1,j,k)+fh(i+1,j,k))
! set imax and imin 0
endif
! y direction
if(Sfy(i,j,k) >= ZEO .and. j+3 <= jmax .and. j-1 >= jmin)then
! v
! D f = ------[ - 3f - 10f + 18f - 6f + f ]
! i 12dx i-v i i+v i+2v i+3v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d12dy*(-F3*fh(i,j-1,k)-F10*fh(i,j,k)+F18*fh(i,j+1,k) &
-F6*fh(i,j+2,k)+ fh(i,j+3,k))
elseif(Sfy(i,j,k) <= ZEO .and. j-3 >= jmin .and. j+1 <= jmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfy(i,j,k)*d12dy*(-F3*fh(i,j+1,k)-F10*fh(i,j,k)+F18*fh(i,j-1,k) &
-F6*fh(i,j-2,k)+ fh(i,j-3,k))
elseif(j+2 <= jmax .and. j-2 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
elseif(j+1 <= jmax .and. j-1 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k) + Sfy(i,j,k)*d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
! set jmin and jmax 0
endif
!! z direction
if(Sfz(i,j,k) >= ZEO .and. k+3 <= kmax .and. k-1 >= kmin)then
! v
! D f = ------[ - 3f - 10f + 18f - 6f + f ]
! i 12dx i-v i i+v i+2v i+3v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k-1)-F10*fh(i,j,k)+F18*fh(i,j,k+1) &
-F6*fh(i,j,k+2)+ fh(i,j,k+3))
elseif(Sfz(i,j,k) <= ZEO .and. k-3 >= kmin .and. k+1 <= kmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k+1)-F10*fh(i,j,k)+F18*fh(i,j,k-1) &
-F6*fh(i,j,k-2)+ fh(i,j,k-3))
elseif(k+2 <= kmax .and. k-2 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
elseif(k+1 <= kmax .and. k-1 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+Sfz(i,j,k)*d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
! set kmin and kmax 0
endif
#else
!! new code, 2012dec27, based on bam !! new code, 2012dec27, based on bam
! x direction ! x direction
if(Sfx(i,j,k) > ZEO)then if(Sfx(i,j,k) > ZEO)then
@@ -240,6 +478,7 @@ subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
! set kmax and kmin 0 ! set kmax and kmin 0
endif endif
endif endif
#endif
enddo enddo
enddo enddo
enddo enddo
@@ -247,3 +486,417 @@ subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
return return
end subroutine lopsided end subroutine lopsided
#elif (ghost_width == 4)
! sixth order code
! Compute advection terms in right hand sides of field equations
! v
! D f = ------[ 2f - 24f - 35f + 80f - 30f + 8f - f ]
! i 60dx i-2v i-v i i+v i+2v i+3v i+4v
!
! where
!
! i
! |B |
! v = -----
! i
! B
!
!-----------------------------------------------------------------------------
subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
implicit none
!~~~~~~> Input parameters:
integer, intent(in) :: ex(1:3),Symmetry
real*8, intent(in) :: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
real*8,dimension(ex(1),ex(2),ex(3)),intent(in) :: f,Sfx,Sfy,Sfz
real*8,dimension(ex(1),ex(2),ex(3)),intent(inout):: f_rhs
real*8,dimension(3),intent(in) ::SoA
!~~~~~~> local variables:
real*8,dimension(-3:ex(1),-3:ex(2),-3:ex(3)) :: fh
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
real*8 :: dX,dY,dZ
real*8 :: d60dx,d60dy,d60dz,d12dx,d12dy,d12dz,d2dx,d2dy,d2dz
real*8, parameter :: ZEO=0.d0,ONE=1.d0, F60=6.d1
real*8, parameter :: TWO=2.d0,F24=2.4d1,F35=3.5d1,F80=8.d1,F30=3.d1,EIT=8.d0
real*8, parameter :: F9=9.d0,F45=4.5d1,F12=1.2d1
real*8, parameter :: F10=1.d1,F77=7.7d1,F150=1.5d2,F100=1.d2,F50=5.d1,F15=1.5d1
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
dX = X(2)-X(1)
dY = Y(2)-Y(1)
dZ = Z(2)-Z(1)
d60dx = ONE/F60/dX
d60dy = ONE/F60/dY
d60dz = ONE/F60/dZ
d12dx = ONE/F12/dX
d12dy = ONE/F12/dY
d12dz = ONE/F12/dZ
d2dx = ONE/TWO/dX
d2dy = ONE/TWO/dY
d2dz = ONE/TWO/dZ
imax = ex(1)
jmax = ex(2)
kmax = ex(3)
imin = 1
jmin = 1
kmin = 1
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -3
if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -3
if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -3
call symmetry_bd(4,ex,f,fh,SoA)
! upper bound set ex-1 only for efficiency,
! the loop body will set ex 0 also
do k=1,ex(3)-1
do j=1,ex(2)-1
do i=1,ex(1)-1
! x direction
if(Sfx(i,j,k) >= ZEO .and. i+4 <= imax .and. i-2 >= imin)then
! v
! D f = ------[ 2f - 24f - 35f + 80f - 30f + 8f - f ]
! i 60dx i-2v i-v i i+v i+2v i+3v i+4v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d60dx*(TWO*fh(i-2,j,k)-F24*fh(i-1,j,k)-F35*fh(i,j,k)+F80*fh(i+1,j,k) &
-F30*fh(i+2,j,k)+EIT*fh(i+3,j,k)- fh(i+4,j,k))
elseif(Sfx(i,j,k) >= ZEO .and. i+5 <= imax .and. i-1 >= imin)then
! v
! D f = ------[-10f - 77f + 150f - 100f + 50f -15f + 2f ]
! i 60dx i-v i i+v i+2v i+3v i+4v i+5v
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d60dx*(-F10*fh(i-1,j,k)-F77*fh(i ,j,k)+F150*fh(i+1,j,k)-F100*fh(i+2,j,k) &
+F50*fh(i+3,j,k)-F15*fh(i+4,j,k)+ TWO*fh(i+5,j,k))
elseif(Sfx(i,j,k) <= ZEO .and. i-4 >= imin .and. i+2 <= imax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfx(i,j,k)*d60dx*(TWO*fh(i+2,j,k)-F24*fh(i+1,j,k)-F35*fh(i,j,k)+F80*fh(i-1,j,k) &
-F30*fh(i-2,j,k)+EIT*fh(i-3,j,k)- fh(i-4,j,k))
elseif(Sfx(i,j,k) <= ZEO .and. i-5 >= imin .and. i+1 <= imax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfx(i,j,k)*d60dx*(-F10*fh(i+1,j,k)-F77*fh(i ,j,k)+F150*fh(i-1,j,k)-F100*fh(i-2,j,k) &
+F50*fh(i-3,j,k)-F15*fh(i-4,j,k)+ TWO*fh(i-5,j,k))
elseif(i+3 <= imax .and. i-3 >= imin)then
! - f(i-3) + 9 f(i-2) - 45 f(i-1) + 45 f(i+1) - 9 f(i+2) + f(i+3)
! fx(i) = -----------------------------------------------------------------
! 60 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d60dx*(-fh(i-3,j,k)+F9*fh(i-2,j,k)-F45*fh(i-1,j,k)+F45*fh(i+1,j,k)-F9*fh(i+2,j,k)+fh(i+3,j,k))
elseif(i+2 <= imax .and. i-2 >= imin)then
!
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
! fx(i) = ---------------------------------------------
! 12 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
elseif(i+1 <= imax .and. i-1 >= imin)then
!
! - f(i-1) + f(i+1)
! fx(i) = --------------------------------
! 2 dx
f_rhs(i,j,k)=f_rhs(i,j,k) + Sfx(i,j,k)*d2dx*(-fh(i-1,j,k)+fh(i+1,j,k))
! set imax and imin 0
endif
! y direction
if(Sfy(i,j,k) >= ZEO .and. j+4 <= jmax .and. j-2 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d60dy*(TWO*fh(i,j-2,k)-F24*fh(i,j-1,k)-F35*fh(i,j,k)+F80*fh(i,j+1,k) &
-F30*fh(i,j+2,k)+EIT*fh(i,j+3,k)- fh(i,j+4,k))
elseif(Sfy(i,j,k) >= ZEO .and. j+5 <= jmax .and. j-1 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d60dy*(-F10*fh(i,j-1,k)-F77*fh(i,j ,k)+F150*fh(i,j+1,k)-F100*fh(i,j+2,k) &
+F50*fh(i,j+3,k)-F15*fh(i,j+4,k)+ TWO*fh(i,j+5,k))
elseif(Sfy(i,j,k) <= ZEO .and. j-4 >= jmin .and. j+2 <= jmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfy(i,j,k)*d60dy*(TWO*fh(i,j+2,k)-F24*fh(i,j+1,k)-F35*fh(i,j,k)+F80*fh(i,j-1,k) &
-F30*fh(i,j-2,k)+EIT*fh(i,j-3,k)- fh(i,j-4,k))
elseif(Sfy(i,j,k) <= ZEO .and. j-5 >= jmin .and. j+1 <= jmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfy(i,j,k)*d60dy*(-F10*fh(i,j+1,k)-F77*fh(i,j ,k)+F150*fh(i,j-1,k)-F100*fh(i,j-2,k) &
+F50*fh(i,j-3,k)-F15*fh(i,j-4,k)+ TWO*fh(i,j-5,k))
elseif(j+3 <= jmax .and. j-3 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d60dy*(-fh(i,j-3,k)+F9*fh(i,j-2,k)-F45*fh(i,j-1,k)+F45*fh(i,j+1,k)-F9*fh(i,j+2,k)+fh(i,j+3,k))
elseif(j+2 <= jmax .and. j-2 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
elseif(j+1 <= jmax .and. j-1 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k) + Sfy(i,j,k)*d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
! set jmin and jmax 0
endif
!! z direction
if(Sfz(i,j,k) >= ZEO .and. k+4 <= kmax .and. k-2 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d60dz*(TWO*fh(i,j,k-2)-F24*fh(i,j,k-1)-F35*fh(i,j,k)+F80*fh(i,j,k+1) &
-F30*fh(i,j,k+2)+EIT*fh(i,j,k+3)- fh(i,j,k+4))
elseif(Sfz(i,j,k) >= ZEO .and. k+5 <= kmax .and. k-1 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d60dz*(-F10*fh(i,j,k-1)-F77*fh(i,j,k )+F150*fh(i,j,k+1)-F100*fh(i,j,k+2) &
+F50*fh(i,j,k+3)-F15*fh(i,j,k+4)+ TWO*fh(i,j,k+5))
elseif(Sfz(i,j,k) <= ZEO .and. k-4 >= kmin .and. k+2 <= kmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfz(i,j,k)*d60dz*(TWO*fh(i,j,k+2)-F24*fh(i,j,k+1)-F35*fh(i,j,k)+F80*fh(i,j,k-1) &
-F30*fh(i,j,k-2)+EIT*fh(i,j,k-3)- fh(i,j,k-4))
elseif(Sfz(i,j,k) <= ZEO .and. k-5 >= kmin .and. k+1 <= kmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfz(i,j,k)*d60dz*(-F10*fh(i,j,k+1)-F77*fh(i,j,k )+F150*fh(i,j,k-1)-F100*fh(i,j,k-2) &
+F50*fh(i,j,k-3)-F15*fh(i,j,k-4)+ TWO*fh(i,j,k-5))
elseif(k+3 <= kmax .and. k-3 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d60dz*(-fh(i,j,k-3)+F9*fh(i,j,k-2)-F45*fh(i,j,k-1)+F45*fh(i,j,k+1)-F9*fh(i,j,k+2)+fh(i,j,k+3))
elseif(k+2 <= kmax .and. k-2 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
elseif(k+1 <= kmax .and. k-1 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+Sfz(i,j,k)*d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
! set kmin and kmax 0
endif
enddo
enddo
enddo
return
end subroutine lopsided
#elif (ghost_width == 5)
! eighth order code
!-----------------------------------------------------------------------------
! PRD 77, 024034 (2008)
! Compute advection terms in right hand sides of field equations
! v [ - 5 f(i-3v) + 60 f(i-2v) - 420 f(i-v) - 378 f(i) + 1050 f(i+v) - 420 f(i+2v) + 140 f(i+3v) - 30 f(i+4v) + 3 f(i+5v)]
! D f = --------------------------------------------------------------------------------------------------------------------------
! i 840 dx
!
! where
!
! i
! |B |
! v = -----
! i
! B
!
!-----------------------------------------------------------------------------
subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
implicit none
!~~~~~~> Input parameters:
integer, intent(in) :: ex(1:3),Symmetry
real*8, intent(in) :: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
real*8,dimension(ex(1),ex(2),ex(3)),intent(in) :: f,Sfx,Sfy,Sfz
real*8,dimension(ex(1),ex(2),ex(3)),intent(inout):: f_rhs
real*8,dimension(3),intent(in) ::SoA
!~~~~~~> local variables:
real*8,dimension(-4:ex(1),-4:ex(2),-4:ex(3)) :: fh
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
real*8 :: dX,dY,dZ
real*8 :: d840dx,d840dy,d840dz,d60dx,d60dy,d60dz,d12dx,d12dy,d12dz,d2dx,d2dy,d2dz
real*8, parameter :: ZEO=0.d0,ONE=1.d0, F60=6.d1
real*8, parameter :: TWO=2.d0,F30=3.d1,EIT=8.d0
real*8, parameter :: F9=9.d0,F45=4.5d1,F12=1.2d1,F140=1.4d2,THR=3.d0
real*8, parameter :: F840=8.4d2,F5=5.d0,F420=4.2d2,F378=3.78d2,F1050=1.05d3
real*8, parameter :: F32=3.2d1,F168=1.68d2,F672=6.72d2
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
dX = X(2)-X(1)
dY = Y(2)-Y(1)
dZ = Z(2)-Z(1)
d840dx = ONE/F840/dX
d840dy = ONE/F840/dY
d840dz = ONE/F840/dZ
d60dx = ONE/F60/dX
d60dy = ONE/F60/dY
d60dz = ONE/F60/dZ
d12dx = ONE/F12/dX
d12dy = ONE/F12/dY
d12dz = ONE/F12/dZ
d2dx = ONE/TWO/dX
d2dy = ONE/TWO/dY
d2dz = ONE/TWO/dZ
imax = ex(1)
jmax = ex(2)
kmax = ex(3)
imin = 1
jmin = 1
kmin = 1
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -4
if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -4
if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -4
call symmetry_bd(5,ex,f,fh,SoA)
! upper bound set ex-1 only for efficiency,
! the loop body will set ex 0 also
do k=1,ex(3)-1
do j=1,ex(2)-1
do i=1,ex(1)-1
! x direction
if(Sfx(i,j,k) >= ZEO .and. i+5 <= imax .and. i-3 >= imin)then
! v [ - 5 f(i-3v) + 60 f(i-2v) - 420 f(i-v) - 378 f(i) + 1050 f(i+v) - 420 f(i+2v) + 140 f(i+3v) - 30 f(i+4v) + 3 f(i+5v)]
! D f = --------------------------------------------------------------------------------------------------------------------------
! i 840 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d840dx*(-F5*fh(i-3,j,k)+F60 *fh(i-2,j,k)-F420*fh(i-1,j,k)-F378*fh(i ,j,k) &
+F1050*fh(i+1,j,k)-F420*fh(i+2,j,k)+F140*fh(i+3,j,k)-F30 *fh(i+4,j,k)+THR*fh(i+5,j,k))
elseif(Sfx(i,j,k) <= ZEO .and. i-5 >= imin .and. i+3 <= imax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfx(i,j,k)*d840dx*(-F5*fh(i+3,j,k)+F60 *fh(i+2,j,k)-F420*fh(i+1,j,k)-F378*fh(i ,j,k) &
+F1050*fh(i-1,j,k)-F420*fh(i-2,j,k)+F140*fh(i-3,j,k)- F30*fh(i-4,j,k)+THR*fh(i-5,j,k))
elseif(i+4 <= imax .and. i-4 >= imin)then
! 3 f(i-4) - 32 f(i-3) + 168 f(i-2) - 672 f(i-1) + 672 f(i+1) - 168 f(i+2) + 32 f(i+3) - 3 f(i+4)
! fx(i) = -------------------------------------------------------------------------------------------------
! 840 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d840dx*( THR*fh(i-4,j,k)-F32 *fh(i-3,j,k)+F168*fh(i-2,j,k)-F672*fh(i-1,j,k)+ &
F672*fh(i+1,j,k)-F168*fh(i+2,j,k)+F32 *fh(i+3,j,k)-THR *fh(i+4,j,k))
elseif(i+3 <= imax .and. i-3 >= imin)then
! - f(i-3) + 9 f(i-2) - 45 f(i-1) + 45 f(i+1) - 9 f(i+2) + f(i+3)
! fx(i) = -----------------------------------------------------------------
! 60 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d60dx*(-fh(i-3,j,k)+F9*fh(i-2,j,k)-F45*fh(i-1,j,k)+F45*fh(i+1,j,k)-F9*fh(i+2,j,k)+fh(i+3,j,k))
elseif(i+2 <= imax .and. i-2 >= imin)then
!
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
! fx(i) = ---------------------------------------------
! 12 dx
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
elseif(i+1 <= imax .and. i-1 >= imin)then
!
! - f(i-1) + f(i+1)
! fx(i) = --------------------------------
! 2 dx
f_rhs(i,j,k)=f_rhs(i,j,k) + Sfx(i,j,k)*d2dx*(-fh(i-1,j,k)+fh(i+1,j,k))
! set imax and imin 0
endif
! y direction
if(Sfy(i,j,k) >= ZEO .and. j+5 <= jmax .and. j-3 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d840dy*(-F5*fh(i,j-3,k)+F60 *fh(i,j-2,k)-F420*fh(i,j-1,k)-F378*fh(i,j ,k) &
+F1050*fh(i,j+1,k)-F420*fh(i,j+2,k)+F140*fh(i,j+3,k)-F30 *fh(i,j+4,k)+THR*fh(i,j+5,k))
elseif(Sfy(i,j,k) <= ZEO .and. j-5 >= jmin .and. j+3 <= jmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfy(i,j,k)*d840dy*(-F5*fh(i,j+3,k)+F60 *fh(i,j+2,k)-F420*fh(i,j+1,k)-F378*fh(i,j ,k) &
+F1050*fh(i,j-1,k)-F420*fh(i,j-2,k)+F140*fh(i,j-3,k)- F30*fh(i,j-4,k)+THR*fh(i,j-5,k))
elseif(j+4 <= jmax .and. j-4 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d840dy*( THR*fh(i,j-4,k)-F32 *fh(i,j-3,k)+F168*fh(i,j-2,k)-F672*fh(i,j-1,k)+ &
F672*fh(i,j+1,k)-F168*fh(i,j+2,k)+F32 *fh(i,j+3,k)-THR *fh(i,j+4,k))
elseif(j+3 <= jmax .and. j-3 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d60dy*(-fh(i,j-3,k)+F9*fh(i,j-2,k)-F45*fh(i,j-1,k)+F45*fh(i,j+1,k)-F9*fh(i,j+2,k)+fh(i,j+3,k))
elseif(j+2 <= jmax .and. j-2 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
elseif(j+1 <= jmax .and. j-1 >= jmin)then
f_rhs(i,j,k)=f_rhs(i,j,k) + Sfy(i,j,k)*d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
! set jmin and jmax 0
endif
!! z direction
if(Sfz(i,j,k) >= ZEO .and. k+5 <= kmax .and. k-3 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d840dz*(-F5*fh(i,j,k-3)+F60 *fh(i,j,k-2)-F420*fh(i,j,k-1)-F378*fh(i,j,k ) &
+F1050*fh(i,j,k+1)-F420*fh(i,j,k+2)+F140*fh(i,j,k+3)-F30 *fh(i,j,k+4)+THR*fh(i,j,k+5))
elseif(Sfz(i,j,k) <= ZEO .and. k-5 >= kmin .and. k+3 <= kmax)then
f_rhs(i,j,k)=f_rhs(i,j,k)- &
Sfz(i,j,k)*d840dz*(-F5*fh(i,j,k+3)+F60 *fh(i,j,k+2)-F420*fh(i,j,k+1)-F378*fh(i,j,k ) &
+F1050*fh(i,j,k-1)-F420*fh(i,j,k-2)+F140*fh(i,j,k-3)- F30*fh(i,j,k-4)+THR*fh(i,j,k-5))
elseif(k+4 <= kmax .and. k-4 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d840dz*( THR*fh(i,j,k-4)-F32 *fh(i,j,k-3)+F168*fh(i,j,k-2)-F672*fh(i,j,k-1)+ &
F672*fh(i,j,k+1)-F168*fh(i,j,k+2)+F32 *fh(i,j,k+3)-THR *fh(i,j,k+4))
elseif(k+3 <= kmax .and. k-3 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d60dz*(-fh(i,j,k-3)+F9*fh(i,j,k-2)-F45*fh(i,j,k-1)+F45*fh(i,j,k+1)-F9*fh(i,j,k+2)+fh(i,j,k+3))
elseif(k+2 <= kmax .and. k-2 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
elseif(k+1 <= kmax .and. k-1 >= kmin)then
f_rhs(i,j,k)=f_rhs(i,j,k)+Sfz(i,j,k)*d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
! set kmin and kmax 0
endif
enddo
enddo
enddo
return
end subroutine lopsided
#endif

View File

@@ -2,7 +2,7 @@
#ifndef MICRODEF_H #ifndef MICRODEF_H
#define MICRODEF_H #define MICRODEF_H
#include "macrodef.fh" #include "microdef.fh"
// application parameters // application parameters

View File

@@ -16,12 +16,6 @@ include makefile.inc
.cu.o: .cu.o:
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH) $(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
TwoPunctures.o: TwoPunctures.C
${CXX} $(CXXAPPFLAGS) -qopenmp -c $< -o $@
TwoPunctureABE.o: TwoPunctureABE.C
${CXX} $(CXXAPPFLAGS) -qopenmp -c $< -o $@
# Input files # Input files
C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\ C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
cgh.o bssn_class.o surface_integral.o ShellPatch.o\ cgh.o bssn_class.o surface_integral.o ShellPatch.o\
@@ -102,7 +96,7 @@ ABEGPU: $(C++FILES_GPU) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES)
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS) $(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
TwoPunctureABE: $(TwoPunctureFILES) TwoPunctureABE: $(TwoPunctureFILES)
$(CLINKER) $(CXXAPPFLAGS) -qopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS) $(CLINKER) $(CXXAPPFLAGS) -o $@ $(TwoPunctureFILES) $(LDLIBS)
clean: clean:
rm *.o ABE ABEGPU TwoPunctureABE make.log -f rm *.o ABE ABEGPU TwoPunctureABE make.log -f

View File

@@ -15,10 +15,11 @@ LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore
## -xHost: Optimize for the host CPU architecture (Intel/AMD compatible) ## -xHost: Optimize for the host CPU architecture (Intel/AMD compatible)
## -fp-model fast=2: Aggressive floating-point optimizations ## -fp-model fast=2: Aggressive floating-point optimizations
## -fma: Enable fused multiply-add instructions ## -fma: Enable fused multiply-add instructions
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \ ## Note: OpenMP has been disabled (-qopenmp removed) due to performance issues
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma \
-Dfortran3 -Dnewc -I${MKLROOT}/include -Dfortran3 -Dnewc -I${MKLROOT}/include
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \ f90appflags = -O3 -xHost -fp-model fast=2 -fma \
-align array64byte -fpp -I${MKLROOT}/include -fpp -I${MKLROOT}/include
f90 = ifx f90 = ifx
f77 = ifx f77 = ifx
CXX = icpx CXX = icpx
@@ -29,3 +30,4 @@ Cu = nvcc
CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include
#CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -arch compute_13 -code compute_13,sm_13 -Dfortran3 -Dnewc #CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -arch compute_13 -code compute_13,sm_13 -Dfortran3 -Dnewc
CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc

View File

@@ -10,17 +10,6 @@
import AMSS_NCKU_Input as input_data import AMSS_NCKU_Input as input_data
import subprocess import subprocess
import time
## CPU core binding configuration using taskset
## taskset ensures all child processes inherit the CPU affinity mask
## This forces make and all compiler processes to use only nohz_full cores (4-55, 60-111)
## Format: taskset -c 4-55,60-111 ensures processes only run on these cores
NUMACTL_CPU_BIND = "taskset -c 0-111"
## Build parallelism configuration
## Use nohz_full cores (4-55, 60-111) for compilation: 52 + 52 = 104 cores
## Set make -j to utilize available cores for faster builds
BUILD_JOBS = 104
################################################################## ##################################################################
@@ -37,11 +26,11 @@ def makefile_ABE():
print( " Compiling the AMSS-NCKU executable file ABE/ABEGPU " ) print( " Compiling the AMSS-NCKU executable file ABE/ABEGPU " )
print( ) print( )
## Build command with CPU binding to nohz_full cores ## Build command
if (input_data.GPU_Calculation == "no"): if (input_data.GPU_Calculation == "no"):
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABE" makefile_command = "make -j4" + " ABE"
elif (input_data.GPU_Calculation == "yes"): elif (input_data.GPU_Calculation == "yes"):
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABEGPU" makefile_command = "make -j4" + " ABEGPU"
else: else:
print( " CPU/GPU numerical calculation setting is wrong " ) print( " CPU/GPU numerical calculation setting is wrong " )
print( ) print( )
@@ -78,8 +67,8 @@ def makefile_TwoPunctureABE():
print( " Compiling the AMSS-NCKU executable file TwoPunctureABE " ) print( " Compiling the AMSS-NCKU executable file TwoPunctureABE " )
print( ) print( )
## Build command with CPU binding to nohz_full cores ## Build command
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} TwoPunctureABE" makefile_command = "make" + " TwoPunctureABE"
## Execute the command with subprocess.Popen and stream output ## Execute the command with subprocess.Popen and stream output
makefile_process = subprocess.Popen(makefile_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) makefile_process = subprocess.Popen(makefile_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
@@ -116,10 +105,10 @@ def run_ABE():
## Define the command to run; cast other values to strings as needed ## Define the command to run; cast other values to strings as needed
if (input_data.GPU_Calculation == "no"): if (input_data.GPU_Calculation == "no"):
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE" mpi_command = "mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
mpi_command_outfile = "ABE_out.log" mpi_command_outfile = "ABE_out.log"
elif (input_data.GPU_Calculation == "yes"): elif (input_data.GPU_Calculation == "yes"):
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU" mpi_command = "mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU"
mpi_command_outfile = "ABEGPU_out.log" mpi_command_outfile = "ABEGPU_out.log"
## Execute the MPI command and stream output ## Execute the MPI command and stream output
@@ -152,13 +141,13 @@ def run_ABE():
## Run the AMSS-NCKU TwoPuncture program TwoPunctureABE ## Run the AMSS-NCKU TwoPuncture program TwoPunctureABE
def run_TwoPunctureABE(): def run_TwoPunctureABE():
tp_time1=time.time()
print( ) print( )
print( " Running the AMSS-NCKU executable file TwoPunctureABE " ) print( " Running the AMSS-NCKU executable file TwoPunctureABE " )
print( ) print( )
## Define the command to run ## Define the command to run
TwoPuncture_command = NUMACTL_CPU_BIND + " ./TwoPunctureABE" TwoPuncture_command = "./TwoPunctureABE"
TwoPuncture_command_outfile = "TwoPunctureABE_out.log" TwoPuncture_command_outfile = "TwoPunctureABE_out.log"
## Execute the command with subprocess.Popen and stream output ## Execute the command with subprocess.Popen and stream output
@@ -179,9 +168,7 @@ def run_TwoPunctureABE():
print( ) print( )
print( " The TwoPunctureABE simulation is finished " ) print( " The TwoPunctureABE simulation is finished " )
print( ) print( )
tp_time2=time.time()
et=tp_time2-tp_time1
print(f"Used time: {et}")
return return
################################################################## ##################################################################