Compare commits

..

6 Commits

12 changed files with 275 additions and 561 deletions

View File

@@ -13,17 +13,14 @@ import numpy
## Setting MPI processes and the output file directory
File_directory = "GW150914" ## output file directory
Output_directory = "binary_output" ## binary data file directory
## The file directory name should not be too long
MPI_processes = 64 ## number of mpi processes used in the simulation
OMP_Threads = 3 ## number of OpenMP threads used by each MPI process
MPI_hosts = ["localhost", "192.168.20.102"] ## MPI hosts for multi-node runs
MPI_processes_per_node = 32 ## MPI ranks launched on each node in MPI_hosts
GPU_Calculation = "no" ## Use GPU or not
## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface)
CPU_Part = 1.0
File_directory = "GW150914" ## output file directory
Output_directory = "binary_output" ## binary data file directory
## The file directory name should not be too long
MPI_processes = 64 ## number of mpi processes used in the simulation
GPU_Calculation = "no" ## Use GPU or not
## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface)
CPU_Part = 1.0
GPU_Part = 0.0
#################################################
@@ -53,7 +50,7 @@ Check_Time = 100.0
Dump_Time = 100.0 ## time inteval dT for dumping binary data
D2_Dump_Time = 100.0 ## dump the ascii data for 2d surface after dT'
Analysis_Time = 0.1 ## dump the puncture position and GW psi4 after dT"
Evolution_Step_Number = 10000000 ## stop the calculation after the maximal step number
Evolution_Step_Number = 10000000 ## stop the calculation after the maximal step number
Courant_Factor = 0.5 ## Courant Factor
Dissipation = 0.15 ## Kreiss-Oliger Dissipation Strength

View File

@@ -2034,9 +2034,9 @@ void bssn_class::Read_Ansorg()
//================================================================================================
void bssn_class::Evolve(int Steps)
{
double prev_clock = 0.0, curr_clock = 0.0;
void bssn_class::Evolve(int Steps)
{
clock_t prev_clock, curr_clock;
double LastDump = 0.0, LastCheck = 0.0, Last2dDump = 0.0;
LastAnas = 0;
#if 0
@@ -2145,14 +2145,14 @@ void bssn_class::Evolve(int Steps)
GH->settrfls(trfls);
for (int ncount = 1; ncount < Steps + 1; ncount++)
{
for (int ncount = 1; ncount < Steps + 1; ncount++)
{
// special for large mass ratio consideration
// if(fabs(Porg0[0][0]-Porg0[1][0])+fabs(Porg0[0][1]-Porg0[1][1])+fabs(Porg0[0][2]-Porg0[1][2])<1e-6)
// { GH->levels=GH->movls; }
if (myrank == 0)
curr_clock = MPI_Wtime();
if (myrank == 0)
curr_clock = clock();
#if (PSTR == 0)
RecursiveStep(0);
#elif (PSTR == 1 || PSTR == 2 || PSTR == 3)
@@ -2205,16 +2205,16 @@ void bssn_class::Evolve(int Steps)
}
}
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = MPI_Wtime();
cout << endl;
cout << " Timestep # " << ncount << ": integrating to time: " << PhysTime << " "
<< " Computer used " << (curr_clock - prev_clock)
<< " seconds! " << endl;
// cout << endl;
}
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << endl;
cout << " Timestep # " << ncount << ": integrating to time: " << PhysTime << " "
<< " Computer used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
// cout << endl;
}
if (PhysTime >= TotalTime)
break;

View File

@@ -1891,7 +1891,7 @@ void bssn_class::Read_Ansorg()
void bssn_class::Evolve(int Steps)
{
double prev_clock = 0.0, curr_clock = 0.0;
clock_t prev_clock, curr_clock;
double LastDump = 0.0, LastCheck = 0.0, Last2dDump = 0.0;
LastAnas = 0;
#if 0
@@ -2035,12 +2035,10 @@ void bssn_class::Evolve(int Steps)
GH->settrfls(trfls);
for (int ncount = 1; ncount < Steps + 1; ncount++)
{
if (myrank == 0)
curr_clock = MPI_Wtime();
cout << "Before Step: " << ncount << " My Rank: " << myrank
<< " takes " << MPI_Wtime() - beg_time << " seconds!" << endl;
for (int ncount = 1; ncount < Steps + 1; ncount++)
{
cout << "Before Step: " << ncount << " My Rank: " << myrank
<< " takes " << MPI_Wtime() - beg_time << " seconds!" << endl;
beg_time = MPI_Wtime();
#if (PSTR == 0)
RecursiveStep(0);
@@ -2097,10 +2095,10 @@ void bssn_class::Evolve(int Steps)
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = MPI_Wtime();
cout << "Timestep # " << ncount << ": integrating to time: " << PhysTime << endl;
cout << "used " << (curr_clock - prev_clock) << " seconds!" << endl;
prev_clock = curr_clock;
curr_clock = clock();
cout << "Timestep # " << ncount << ": integrating to time: " << PhysTime << endl;
cout << "used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds!" << endl;
}
if (PhysTime >= TotalTime)

View File

@@ -2,24 +2,6 @@
#include "bssn_rhs.h"
#include "share_func.h"
#include "tool.h"
#ifdef _OPENMP
#define BSSN_OMP_TASK_GROUP_BEGIN \
_Pragma("omp parallel") \
{ \
_Pragma("omp single nowait") \
{
#define BSSN_OMP_TASK_CALL(...) \
_Pragma("omp task") { __VA_ARGS__; }
#define BSSN_OMP_TASK_GROUP_END \
_Pragma("omp taskwait") \
} \
}
#else
#define BSSN_OMP_TASK_GROUP_BEGIN {
#define BSSN_OMP_TASK_CALL(...) { __VA_ARGS__; }
#define BSSN_OMP_TASK_GROUP_END }
#endif
// 0-based i,j,k
// #define IDX_F(i,j,k,nx,ny) ((i) + (j)*(nx) + (k)*(nx)*(ny))
// ex(1)=nx, ex(2)=ny, ex(3)=nz
@@ -126,20 +108,18 @@ int f_compute_rhs_bssn(int *ex, double &T,
chin1[i] = chi[i] + 1.0;
}
// 9ms //
BSSN_OMP_TASK_GROUP_BEGIN
BSSN_OMP_TASK_CALL(fderivs(ex,betax,betaxx,betaxy,betaxz,X,Y,Z,ANTI, SYM, SYM,Symmetry,Lev))
BSSN_OMP_TASK_CALL(fderivs(ex,betay,betayx,betayy,betayz,X,Y,Z, SYM,ANTI, SYM,Symmetry,Lev))
BSSN_OMP_TASK_CALL(fderivs(ex,betaz,betazx,betazy,betazz,X,Y,Z, SYM, SYM,ANTI,Symmetry,Lev))
BSSN_OMP_TASK_CALL(fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev))
BSSN_OMP_TASK_CALL(fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev))
BSSN_OMP_TASK_CALL(fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev))
BSSN_OMP_TASK_CALL(fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev))
BSSN_OMP_TASK_CALL(fderivs(ex,dyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev))
BSSN_OMP_TASK_CALL(fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev))
BSSN_OMP_TASK_CALL(fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev))
BSSN_OMP_TASK_CALL(fderivs(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev))
BSSN_OMP_TASK_CALL(fderivs(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev))
BSSN_OMP_TASK_GROUP_END
fderivs(ex,betax,betaxx,betaxy,betaxz,X,Y,Z,ANTI, SYM, SYM,Symmetry,Lev);
fderivs(ex,betay,betayx,betayy,betayz,X,Y,Z, SYM,ANTI, SYM,Symmetry,Lev);
fderivs(ex,betaz,betazx,betazy,betazz,X,Y,Z, SYM, SYM,ANTI,Symmetry,Lev);
fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev);
fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev);
fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev);
fderivs(ex,dyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev);
fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev);
fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev);
fderivs(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
fderivs(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
// 3ms //
for(int i=0;i<all;i+=1){
@@ -336,14 +316,15 @@ int f_compute_rhs_bssn(int *ex, double &T,
);
}
// 22.3ms //
BSSN_OMP_TASK_GROUP_BEGIN
BSSN_OMP_TASK_CALL(fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev))
BSSN_OMP_TASK_CALL(fdderivs(ex,betay,gxxy,gxyy,gxzy,gyyy,gyzy,gzzy,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev))
BSSN_OMP_TASK_CALL(fdderivs(ex,betaz,gxxz,gxyz,gxzz,gyyz,gyzz,gzzz,X,Y,Z,SYM ,SYM, ANTI,Symmetry,Lev))
BSSN_OMP_TASK_CALL(fderivs(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev))
BSSN_OMP_TASK_CALL(fderivs(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev))
BSSN_OMP_TASK_CALL(fderivs(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev))
BSSN_OMP_TASK_GROUP_END
fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,
X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev);
fdderivs(ex,betay,gxxy,gxyy,gxzy,gyyy,gyzy,gzzy,
X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev);
fdderivs(ex,betaz,gxxz,gxyz,gxzz,gyyz,gyzz,gzzz,
X,Y,Z,SYM ,SYM, ANTI,Symmetry,Lev);
fderivs(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev);
fderivs(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev);
fderivs(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev);
// Fused: fxx/Gamxa + Gamma_rhs part 2 (2 loops -> 1)
for(int i=0;i<all;i+=1){
@@ -1082,32 +1063,30 @@ int f_compute_rhs_bssn(int *ex, double &T,
#endif
}
// advection + KO dissipation with shared symmetry buffer
BSSN_OMP_TASK_GROUP_BEGIN
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,dxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps))
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA,eps))
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps))
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS,eps))
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps))
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS,eps))
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,dyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps))
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS,eps))
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps))
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA,eps))
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,dzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps))
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS,eps))
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps))
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS,eps))
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps))
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA,eps))
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA,eps))
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS,eps))
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA,eps))
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS,eps))
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS,eps))
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS,eps))
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS,eps))
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS,eps))
BSSN_OMP_TASK_GROUP_END
lopsided_kodis(ex,X,Y,Z,dxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps);
lopsided_kodis(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA,eps);
lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps);
lopsided_kodis(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS,eps);
lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps);
lopsided_kodis(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS,eps);
lopsided_kodis(ex,X,Y,Z,dyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps);
lopsided_kodis(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS,eps);
lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps);
lopsided_kodis(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA,eps);
lopsided_kodis(ex,X,Y,Z,dzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps);
lopsided_kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS,eps);
lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps);
lopsided_kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS,eps);
lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps);
lopsided_kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA,eps);
lopsided_kodis(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA,eps);
lopsided_kodis(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS,eps);
lopsided_kodis(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA,eps);
lopsided_kodis(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS,eps);
lopsided_kodis(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS,eps);
lopsided_kodis(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS,eps);
lopsided_kodis(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS,eps);
lopsided_kodis(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS,eps);
// 2ms //
if(co==0){
for (int i=0;i<all;i+=1) {
@@ -1154,67 +1133,65 @@ int f_compute_rhs_bssn(int *ex, double &T,
}
// 1ms //
BSSN_OMP_TASK_GROUP_BEGIN
BSSN_OMP_TASK_CALL(fderivs(ex,Axx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0))
BSSN_OMP_TASK_CALL(fderivs(ex,Axy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,0))
BSSN_OMP_TASK_CALL(fderivs(ex,Axz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,0))
BSSN_OMP_TASK_CALL(fderivs(ex,Ayy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0))
BSSN_OMP_TASK_CALL(fderivs(ex,Ayz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,0))
BSSN_OMP_TASK_CALL(fderivs(ex,Azz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0))
BSSN_OMP_TASK_GROUP_END
// 7ms //
for (int i=0;i<all;i+=1) {
gxxx[i] = gxxx[i] - ( Gamxxx[i] * Axx[i] + Gamyxx[i] * Axy[i] + Gamzxx[i] * Axz[i]
+ Gamxxx[i] * Axx[i] + Gamyxx[i] * Axy[i] + Gamzxx[i] * Axz[i]) - chix[i]*Axx[i]/chin1[i];
gxyx[i] = gxyx[i] - ( Gamxxy[i] * Axx[i] + Gamyxy[i] * Axy[i] + Gamzxy[i] * Axz[i]
+ Gamxxx[i] * Axy[i] + Gamyxx[i] * Ayy[i] + Gamzxx[i] * Ayz[i]) - chix[i]*Axy[i]/chin1[i];
gxzx[i] = gxzx[i] - ( Gamxxz[i] * Axx[i] + Gamyxz[i] * Axy[i] + Gamzxz[i] * Axz[i]
+ Gamxxx[i] * Axz[i] + Gamyxx[i] * Ayz[i] + Gamzxx[i] * Azz[i]) - chix[i]*Axz[i]/chin1[i];
gyyx[i] = gyyx[i] - ( Gamxxy[i] * Axy[i] + Gamyxy[i] * Ayy[i] + Gamzxy[i] * Ayz[i]
+ Gamxxy[i] * Axy[i] + Gamyxy[i] * Ayy[i] + Gamzxy[i] * Ayz[i]) - chix[i]*Ayy[i]/chin1[i];
gyzx[i] = gyzx[i] - ( Gamxxz[i] * Axy[i] + Gamyxz[i] * Ayy[i] + Gamzxz[i] * Ayz[i]
+ Gamxxy[i] * Axz[i] + Gamyxy[i] * Ayz[i] + Gamzxy[i] * Azz[i]) - chix[i]*Ayz[i]/chin1[i];
gzzx[i] = gzzx[i] - ( Gamxxz[i] * Axz[i] + Gamyxz[i] * Ayz[i] + Gamzxz[i] * Azz[i]
+ Gamxxz[i] * Axz[i] + Gamyxz[i] * Ayz[i] + Gamzxz[i] * Azz[i]) - chix[i]*Azz[i]/chin1[i];
gxxy[i] = gxxy[i] - ( Gamxxy[i] * Axx[i] + Gamyxy[i] * Axy[i] + Gamzxy[i] * Axz[i]
+ Gamxxy[i] * Axx[i] + Gamyxy[i] * Axy[i] + Gamzxy[i] * Axz[i]) - chiy[i]*Axx[i]/chin1[i];
gxyy[i] = gxyy[i] - ( Gamxyy[i] * Axx[i] + Gamyyy[i] * Axy[i] + Gamzyy[i] * Axz[i]
+ Gamxxy[i] * Axy[i] + Gamyxy[i] * Ayy[i] + Gamzxy[i] * Ayz[i]) - chiy[i]*Axy[i]/chin1[i];
gxzy[i] = gxzy[i] - ( Gamxyz[i] * Axx[i] + Gamyyz[i] * Axy[i] + Gamzyz[i] * Axz[i]
+ Gamxxy[i] * Axz[i] + Gamyxy[i] * Ayz[i] + Gamzxy[i] * Azz[i]) - chiy[i]*Axz[i]/chin1[i];
gyyy[i] = gyyy[i] - ( Gamxyy[i] * Axy[i] + Gamyyy[i] * Ayy[i] + Gamzyy[i] * Ayz[i]
+ Gamxyy[i] * Axy[i] + Gamyyy[i] * Ayy[i] + Gamzyy[i] * Ayz[i]) - chiy[i]*Ayy[i]/chin1[i];
gyzy[i] = gyzy[i] - ( Gamxyz[i] * Axy[i] + Gamyyz[i] * Ayy[i] + Gamzyz[i] * Ayz[i]
+ Gamxyy[i] * Axz[i] + Gamyyy[i] * Ayz[i] + Gamzyy[i] * Azz[i]) - chiy[i]*Ayz[i]/chin1[i];
gzzy[i] = gzzy[i] - ( Gamxyz[i] * Axz[i] + Gamyyz[i] * Ayz[i] + Gamzyz[i] * Azz[i]
+ Gamxyz[i] * Axz[i] + Gamyyz[i] * Ayz[i] + Gamzyz[i] * Azz[i]) - chiy[i]*Azz[i]/chin1[i];
gxxz[i] = gxxz[i] - ( Gamxxz[i] * Axx[i] + Gamyxz[i] * Axy[i] + Gamzxz[i] * Axz[i]
+ Gamxxz[i] * Axx[i] + Gamyxz[i] * Axy[i] + Gamzxz[i] * Axz[i]) - chiz[i]*Axx[i]/chin1[i];
gxyz[i] = gxyz[i] - ( Gamxyz[i] * Axx[i] + Gamyyz[i] * Axy[i] + Gamzyz[i] * Axz[i]
+ Gamxxz[i] * Axy[i] + Gamyxz[i] * Ayy[i] + Gamzxz[i] * Ayz[i]) - chiz[i]*Axy[i]/chin1[i];
gxzz[i] = gxzz[i] - ( Gamxzz[i] * Axx[i] + Gamyzz[i] * Axy[i] + Gamzzz[i] * Axz[i]
+ Gamxxz[i] * Axz[i] + Gamyxz[i] * Ayz[i] + Gamzxz[i] * Azz[i]) - chiz[i]*Axz[i]/chin1[i];
gyyz[i] = gyyz[i] - ( Gamxyz[i] * Axy[i] + Gamyyz[i] * Ayy[i] + Gamzyz[i] * Ayz[i]
+ Gamxyz[i] * Axy[i] + Gamyyz[i] * Ayy[i] + Gamzyz[i] * Ayz[i]) - chiz[i]*Ayy[i]/chin1[i];
gyzz[i] = gyzz[i] - ( Gamxzz[i] * Axy[i] + Gamyzz[i] * Ayy[i] + Gamzzz[i] * Ayz[i]
+ Gamxyz[i] * Axz[i] + Gamyyz[i] * Ayz[i] + Gamzyz[i] * Azz[i]) - chiz[i]*Ayz[i]/chin1[i];
gzzz[i] = gzzz[i] - ( Gamxzz[i] * Axz[i] + Gamyzz[i] * Ayz[i] + Gamzzz[i] * Azz[i]
+ Gamxzz[i] * Axz[i] + Gamyzz[i] * Ayz[i] + Gamzzz[i] * Azz[i]) - chiz[i]*Azz[i]/chin1[i];
fderivs(ex,Axx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0);
fderivs(ex,Axy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,0);
fderivs(ex,Axz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,0);
fderivs(ex,Ayy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0);
fderivs(ex,Ayz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,0);
fderivs(ex,Azz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0);
}
// 7ms //
for (int i=0;i<all;i+=1) {
gxxx[i] = gxxx[i] - ( Gamxxx[i] * Axx[i] + Gamyxx[i] * Axy[i] + Gamzxx[i] * Axz[i]
+ Gamxxx[i] * Axx[i] + Gamyxx[i] * Axy[i] + Gamzxx[i] * Axz[i]) - chix[i]*Axx[i]/chin1[i];
gxyx[i] = gxyx[i] - ( Gamxxy[i] * Axx[i] + Gamyxy[i] * Axy[i] + Gamzxy[i] * Axz[i]
+ Gamxxx[i] * Axy[i] + Gamyxx[i] * Ayy[i] + Gamzxx[i] * Ayz[i]) - chix[i]*Axy[i]/chin1[i];
gxzx[i] = gxzx[i] - ( Gamxxz[i] * Axx[i] + Gamyxz[i] * Axy[i] + Gamzxz[i] * Axz[i]
+ Gamxxx[i] * Axz[i] + Gamyxx[i] * Ayz[i] + Gamzxx[i] * Azz[i]) - chix[i]*Axz[i]/chin1[i];
gyyx[i] = gyyx[i] - ( Gamxxy[i] * Axy[i] + Gamyxy[i] * Ayy[i] + Gamzxy[i] * Ayz[i]
+ Gamxxy[i] * Axy[i] + Gamyxy[i] * Ayy[i] + Gamzxy[i] * Ayz[i]) - chix[i]*Ayy[i]/chin1[i];
gyzx[i] = gyzx[i] - ( Gamxxz[i] * Axy[i] + Gamyxz[i] * Ayy[i] + Gamzxz[i] * Ayz[i]
+ Gamxxy[i] * Axz[i] + Gamyxy[i] * Ayz[i] + Gamzxy[i] * Azz[i]) - chix[i]*Ayz[i]/chin1[i];
gzzx[i] = gzzx[i] - ( Gamxxz[i] * Axz[i] + Gamyxz[i] * Ayz[i] + Gamzxz[i] * Azz[i]
+ Gamxxz[i] * Axz[i] + Gamyxz[i] * Ayz[i] + Gamzxz[i] * Azz[i]) - chix[i]*Azz[i]/chin1[i];
gxxy[i] = gxxy[i] - ( Gamxxy[i] * Axx[i] + Gamyxy[i] * Axy[i] + Gamzxy[i] * Axz[i]
+ Gamxxy[i] * Axx[i] + Gamyxy[i] * Axy[i] + Gamzxy[i] * Axz[i]) - chiy[i]*Axx[i]/chin1[i];
gxyy[i] = gxyy[i] - ( Gamxyy[i] * Axx[i] + Gamyyy[i] * Axy[i] + Gamzyy[i] * Axz[i]
+ Gamxxy[i] * Axy[i] + Gamyxy[i] * Ayy[i] + Gamzxy[i] * Ayz[i]) - chiy[i]*Axy[i]/chin1[i];
gxzy[i] = gxzy[i] - ( Gamxyz[i] * Axx[i] + Gamyyz[i] * Axy[i] + Gamzyz[i] * Axz[i]
+ Gamxxy[i] * Axz[i] + Gamyxy[i] * Ayz[i] + Gamzxy[i] * Azz[i]) - chiy[i]*Axz[i]/chin1[i];
gyyy[i] = gyyy[i] - ( Gamxyy[i] * Axy[i] + Gamyyy[i] * Ayy[i] + Gamzyy[i] * Ayz[i]
+ Gamxyy[i] * Axy[i] + Gamyyy[i] * Ayy[i] + Gamzyy[i] * Ayz[i]) - chiy[i]*Ayy[i]/chin1[i];
gyzy[i] = gyzy[i] - ( Gamxyz[i] * Axy[i] + Gamyyz[i] * Ayy[i] + Gamzyz[i] * Ayz[i]
+ Gamxyy[i] * Axz[i] + Gamyyy[i] * Ayz[i] + Gamzyy[i] * Azz[i]) - chiy[i]*Ayz[i]/chin1[i];
gzzy[i] = gzzy[i] - ( Gamxyz[i] * Axz[i] + Gamyyz[i] * Ayz[i] + Gamzyz[i] * Azz[i]
+ Gamxyz[i] * Axz[i] + Gamyyz[i] * Ayz[i] + Gamzyz[i] * Azz[i]) - chiy[i]*Azz[i]/chin1[i];
gxxz[i] = gxxz[i] - ( Gamxxz[i] * Axx[i] + Gamyxz[i] * Axy[i] + Gamzxz[i] * Axz[i]
+ Gamxxz[i] * Axx[i] + Gamyxz[i] * Axy[i] + Gamzxz[i] * Axz[i]) - chiz[i]*Axx[i]/chin1[i];
gxyz[i] = gxyz[i] - ( Gamxyz[i] * Axx[i] + Gamyyz[i] * Axy[i] + Gamzyz[i] * Axz[i]
+ Gamxxz[i] * Axy[i] + Gamyxz[i] * Ayy[i] + Gamzxz[i] * Ayz[i]) - chiz[i]*Axy[i]/chin1[i];
gxzz[i] = gxzz[i] - ( Gamxzz[i] * Axx[i] + Gamyzz[i] * Axy[i] + Gamzzz[i] * Axz[i]
+ Gamxxz[i] * Axz[i] + Gamyxz[i] * Ayz[i] + Gamzxz[i] * Azz[i]) - chiz[i]*Axz[i]/chin1[i];
gyyz[i] = gyyz[i] - ( Gamxyz[i] * Axy[i] + Gamyyz[i] * Ayy[i] + Gamzyz[i] * Ayz[i]
+ Gamxyz[i] * Axy[i] + Gamyyz[i] * Ayy[i] + Gamzyz[i] * Ayz[i]) - chiz[i]*Ayy[i]/chin1[i];
gyzz[i] = gyzz[i] - ( Gamxzz[i] * Axy[i] + Gamyzz[i] * Ayy[i] + Gamzzz[i] * Ayz[i]
+ Gamxyz[i] * Axz[i] + Gamyyz[i] * Ayz[i] + Gamzyz[i] * Azz[i]) - chiz[i]*Ayz[i]/chin1[i];
gzzz[i] = gzzz[i] - ( Gamxzz[i] * Axz[i] + Gamyzz[i] * Ayz[i] + Gamzzz[i] * Azz[i]
+ Gamxzz[i] * Axz[i] + Gamyzz[i] * Ayz[i] + Gamzzz[i] * Azz[i]) - chiz[i]*Azz[i]/chin1[i];
movx_Res[i] = gupxx[i]*gxxx[i] + gupyy[i]*gxyy[i] + gupzz[i]*gxzz[i]
+ gupxy[i]*gxyx[i] + gupxz[i]*gxzx[i] + gupyz[i]*gxzy[i]
+ gupxy[i]*gxxy[i] + gupxz[i]*gxxz[i] + gupyz[i]*gxyz[i];
movy_Res[i] = gupxx[i]*gxyx[i] + gupyy[i]*gyyy[i] + gupzz[i]*gyzz[i]
+ gupxy[i]*gyyx[i] + gupxz[i]*gyzx[i] + gupyz[i]*gyzy[i]
+ gupxy[i]*gxyy[i] + gupxz[i]*gxyz[i] + gupyz[i]*gyyz[i];
movz_Res[i] = gupxx[i]*gxzx[i] + gupyy[i]*gyzy[i] + gupzz[i]*gzzz[i]
+ gupxy[i]*gyzx[i] + gupxz[i]*gzzx[i] + gupyz[i]*gzzy[i]
+ gupxy[i]*gxzy[i] + gupxz[i]*gxzz[i] + gupyz[i]*gyzz[i];
movx_Res[i] = gupxx[i]*gxxx[i] + gupyy[i]*gxyy[i] + gupzz[i]*gxzz[i]
+ gupxy[i]*gxyx[i] + gupxz[i]*gxzx[i] + gupyz[i]*gxzy[i]
+ gupxy[i]*gxxy[i] + gupxz[i]*gxxz[i] + gupyz[i]*gxyz[i];
movy_Res[i] = gupxx[i]*gxyx[i] + gupyy[i]*gyyy[i] + gupzz[i]*gyzz[i]
+ gupxy[i]*gyyx[i] + gupxz[i]*gyzx[i] + gupyz[i]*gyzy[i]
+ gupxy[i]*gxyy[i] + gupxz[i]*gxyz[i] + gupyz[i]*gyyz[i];
movz_Res[i] = gupxx[i]*gxzx[i] + gupyy[i]*gyzy[i] + gupzz[i]*gzzz[i]
+ gupxy[i]*gyzx[i] + gupxz[i]*gzzx[i] + gupyz[i]*gzzy[i]
+ gupxy[i]*gxzy[i] + gupxz[i]*gxzz[i] + gupyz[i]*gyzz[i];
movx_Res[i] = movx_Res[i] - F2o3*Kx[i] - F8*PI*Sx[i];
movy_Res[i] = movy_Res[i] - F2o3*Ky[i] - F8*PI*Sy[i];
movz_Res[i] = movz_Res[i] - F2o3*Kz[i] - F8*PI*Sz[i];
}
movx_Res[i] = movx_Res[i] - F2o3*Kx[i] - F8*PI*Sx[i];
movy_Res[i] = movy_Res[i] - F2o3*Ky[i] - F8*PI*Sy[i];
movz_Res[i] = movz_Res[i] - F2o3*Kz[i] - F8*PI*Sz[i];
}

View File

@@ -41,8 +41,8 @@ void fdderivs(const int ex[3],
const size_t nz = (size_t)ex3 + 2;
const size_t fh_size = nx * ny * nz;
static thread_local double *fh = NULL;
static thread_local size_t cap = 0;
static double *fh = NULL;
static size_t cap = 0;
if (fh_size > cap) {
free(fh);
@@ -71,131 +71,106 @@ void fdderivs(const int ex[3],
const double Fdxdz = F1o144 / (dX * dZ);
const double Fdydz = F1o144 / (dY * dZ);
/* 只清零不被主循环覆盖的边界面 */
{
/* 高边界k0=ex3-1 */
for (int j0 = 0; j0 < ex2; ++j0)
for (int i0 = 0; i0 < ex1; ++i0) {
const size_t p = idx_ex(i0, j0, ex3 - 1, ex);
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
}
/* 高边界j0=ex2-1 */
for (int k0 = 0; k0 < ex3 - 1; ++k0)
for (int i0 = 0; i0 < ex1; ++i0) {
const size_t p = idx_ex(i0, ex2 - 1, k0, ex);
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
}
/* 高边界i0=ex1-1 */
for (int k0 = 0; k0 < ex3 - 1; ++k0)
for (int j0 = 0; j0 < ex2 - 1; ++j0) {
const size_t p = idx_ex(ex1 - 1, j0, k0, ex);
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
}
/* 低边界:当二阶模板也不可用时,对应 i0/j0/k0=0 面 */
if (kminF == 1) {
for (int j0 = 0; j0 < ex2; ++j0)
for (int i0 = 0; i0 < ex1; ++i0) {
const size_t p = idx_ex(i0, j0, 0, ex);
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
}
}
if (jminF == 1) {
for (int k0 = 0; k0 < ex3; ++k0)
for (int i0 = 0; i0 < ex1; ++i0) {
const size_t p = idx_ex(i0, 0, k0, ex);
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
}
}
if (iminF == 1) {
for (int k0 = 0; k0 < ex3; ++k0)
for (int j0 = 0; j0 < ex2; ++j0) {
const size_t p = idx_ex(0, j0, k0, ex);
fxx[p]=ZEO; fyy[p]=ZEO; fzz[p]=ZEO;
fxy[p]=ZEO; fxz[p]=ZEO; fyz[p]=ZEO;
}
}
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
for (size_t p = 0; p < all; ++p) {
fxx[p] = ZEO; fxy[p] = ZEO; fxz[p] = ZEO;
fyy[p] = ZEO; fyz[p] = ZEO; fzz[p] = ZEO;
}
/*
* 两段式:
* 1) 二阶可用区域先计算二阶模板
* 2) 高阶可用区域再覆盖四阶模板
*/
const int i2_lo = (iminF > 0) ? iminF : 0;
const int j2_lo = (jminF > 0) ? jminF : 0;
const int k2_lo = (kminF > 0) ? kminF : 0;
const int i2_hi = ex1 - 2;
const int j2_hi = ex2 - 2;
const int k2_hi = ex3 - 2;
const int i4_lo = (iminF + 1 > 0) ? (iminF + 1) : 0;
const int j4_lo = (jminF + 1 > 0) ? (jminF + 1) : 0;
const int k4_lo = (kminF + 1 > 0) ? (kminF + 1) : 0;
const int i4_hi = ex1 - 3;
const int j4_hi = ex2 - 3;
const int k4_hi = ex3 - 3;
/*
* Strategy A:
* Avoid redundant work in overlap of 2nd/4th-order regions.
* Only compute 2nd-order on shell points that are NOT overwritten by
* the 4th-order pass.
*/
const int has4 = (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi);
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
if (has4 &&
i0 >= i4_lo && i0 <= i4_hi &&
j0 >= j4_lo && j0 <= j4_hi &&
k0 >= k4_lo && k0 <= k4_hi) {
continue;
}
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
// Match Fortran (ghost_width=3, "for bam comparison") exactly:
// only compute when x/y/z all satisfy the same-order stencil at this point.
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
if ((iF + 2 <= imaxF && iF - 2 >= iminF) &&
(jF + 2 <= jmaxF && jF - 2 >= jminF) &&
(kF + 2 <= kmaxF && kF - 2 >= kminF)) {
fxx[p] = Fdxdx * (
-fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fyy[p] = Fdydy * (
-fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fzz[p] = Fdzdz * (
-fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
fxy[p] = Fdxdy * (
(fh[idx_fh_F_ord2(iF - 2, jF - 2, kF, ex)] - F8 * fh[idx_fh_F_ord2(iF - 1, jF - 2, kF, ex)] +
F8 * fh[idx_fh_F_ord2(iF + 1, jF - 2, kF, ex)] - fh[idx_fh_F_ord2(iF + 2, jF - 2, kF, ex)])
- F8 * (fh[idx_fh_F_ord2(iF - 2, jF - 1, kF, ex)] - F8 * fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)] +
F8 * fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)] - fh[idx_fh_F_ord2(iF + 2, jF - 1, kF, ex)])
+ F8 * (fh[idx_fh_F_ord2(iF - 2, jF + 1, kF, ex)] - F8 * fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)] +
F8 * fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)] - fh[idx_fh_F_ord2(iF + 2, jF + 1, kF, ex)])
- (fh[idx_fh_F_ord2(iF - 2, jF + 2, kF, ex)] - F8 * fh[idx_fh_F_ord2(iF - 1, jF + 2, kF, ex)] +
F8 * fh[idx_fh_F_ord2(iF + 1, jF + 2, kF, ex)] - fh[idx_fh_F_ord2(iF + 2, jF + 2, kF, ex)])
);
fxz[p] = Fdxdz * (
(fh[idx_fh_F_ord2(iF - 2, jF, kF - 2, ex)] - F8 * fh[idx_fh_F_ord2(iF - 1, jF, kF - 2, ex)] +
F8 * fh[idx_fh_F_ord2(iF + 1, jF, kF - 2, ex)] - fh[idx_fh_F_ord2(iF + 2, jF, kF - 2, ex)])
- F8 * (fh[idx_fh_F_ord2(iF - 2, jF, kF - 1, ex)] - F8 * fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)] +
F8 * fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)] - fh[idx_fh_F_ord2(iF + 2, jF, kF - 1, ex)])
+ F8 * (fh[idx_fh_F_ord2(iF - 2, jF, kF + 1, ex)] - F8 * fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)] +
F8 * fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)] - fh[idx_fh_F_ord2(iF + 2, jF, kF + 1, ex)])
- (fh[idx_fh_F_ord2(iF - 2, jF, kF + 2, ex)] - F8 * fh[idx_fh_F_ord2(iF - 1, jF, kF + 2, ex)] +
F8 * fh[idx_fh_F_ord2(iF + 1, jF, kF + 2, ex)] - fh[idx_fh_F_ord2(iF + 2, jF, kF + 2, ex)])
);
fyz[p] = Fdydz * (
(fh[idx_fh_F_ord2(iF, jF - 2, kF - 2, ex)] - F8 * fh[idx_fh_F_ord2(iF, jF - 1, kF - 2, ex)] +
F8 * fh[idx_fh_F_ord2(iF, jF + 1, kF - 2, ex)] - fh[idx_fh_F_ord2(iF, jF + 2, kF - 2, ex)])
- F8 * (fh[idx_fh_F_ord2(iF, jF - 2, kF - 1, ex)] - F8 * fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)] +
F8 * fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)] - fh[idx_fh_F_ord2(iF, jF + 2, kF - 1, ex)])
+ F8 * (fh[idx_fh_F_ord2(iF, jF - 2, kF + 1, ex)] - F8 * fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)] +
F8 * fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)] - fh[idx_fh_F_ord2(iF, jF + 2, kF + 1, ex)])
- (fh[idx_fh_F_ord2(iF, jF - 2, kF + 2, ex)] - F8 * fh[idx_fh_F_ord2(iF, jF - 1, kF + 2, ex)] +
F8 * fh[idx_fh_F_ord2(iF, jF + 1, kF + 2, ex)] - fh[idx_fh_F_ord2(iF, jF + 2, kF + 2, ex)])
);
} else if ((iF + 1 <= imaxF && iF - 1 >= iminF) &&
(jF + 1 <= jmaxF && jF - 1 >= jminF) &&
(kF + 1 <= kmaxF && kF - 1 >= kminF)) {
fxx[p] = Sdxdx * (
fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fyy[p] = Sdydy * (
fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fzz[p] = Sdzdz * (
fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
fxy[p] = Sdxdy * (
fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)] -
fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)] -
fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
);
fxz[p] = Sdxdz * (
fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
);
fyz[p] = Sdydz * (
fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)] -
@@ -207,126 +182,5 @@ void fdderivs(const int ex[3],
}
}
if (has4) {
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i4_lo; i0 <= i4_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
fxx[p] = Fdxdx * (
-fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fyy[p] = Fdydy * (
-fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fzz[p] = Fdzdz * (
-fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
{
const double t_jm2 =
( fh[idx_fh_F_ord2(iF - 2, jF - 2, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 2, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 2, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF - 2, kF, ex)] );
const double t_jm1 =
( fh[idx_fh_F_ord2(iF - 2, jF - 1, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF - 1, kF, ex)] );
const double t_jp1 =
( fh[idx_fh_F_ord2(iF - 2, jF + 1, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF + 1, kF, ex)] );
const double t_jp2 =
( fh[idx_fh_F_ord2(iF - 2, jF + 2, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 2, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 2, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF + 2, kF, ex)] );
fxy[p] = Fdxdy * ( t_jm2 - F8 * t_jm1 + F8 * t_jp1 - t_jp2 );
}
{
const double t_km2 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 2, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 2, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 2, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 2, ex)] );
const double t_km1 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 1, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 1, ex)] );
const double t_kp1 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 1, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 1, ex)] );
const double t_kp2 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 2, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 2, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 2, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 2, ex)] );
fxz[p] = Fdxdz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
}
{
const double t_km2 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 2, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 2, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 2, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 2, ex)] );
const double t_km1 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 1, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 1, ex)] );
const double t_kp1 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 1, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 1, ex)] );
const double t_kp2 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 2, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 2, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 2, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 2, ex)] );
fyz[p] = Fdydz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
}
}
}
}
}
// free(fh);
}

View File

@@ -50,8 +50,8 @@ void fderivs(const int ex[3],
const size_t ny = (size_t)ex2 + 2;
const size_t nz = (size_t)ex3 + 2;
const size_t fh_size = nx * ny * nz;
static thread_local double *fh = NULL;
static thread_local size_t cap = 0;
static double *fh = NULL;
static size_t cap = 0;
if (fh_size > cap) {
free(fh);
@@ -80,46 +80,48 @@ void fderivs(const int ex[3],
fz[p] = ZEO;
}
/*
* 两段式:
* 1) 先在二阶可用区域计算二阶模板
* 2) 再在高阶可用区域覆盖为四阶模板
*
* 与原 if/elseif 逻辑等价,但减少逐点分支判断。
*/
const int i2_lo = (iminF > 0) ? iminF : 0;
const int j2_lo = (jminF > 0) ? jminF : 0;
const int k2_lo = (kminF > 0) ? kminF : 0;
const int i2_hi = ex1 - 2;
const int j2_hi = ex2 - 2;
const int k2_hi = ex3 - 2;
const int i4_lo = (iminF + 1 > 0) ? (iminF + 1) : 0;
const int j4_lo = (jminF + 1 > 0) ? (jminF + 1) : 0;
const int k4_lo = (kminF + 1 > 0) ? (kminF + 1) : 0;
const int i4_hi = ex1 - 3;
const int j4_hi = ex2 - 3;
const int k4_hi = ex3 - 3;
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
// Match Fortran (ghost_width=3, "for bam comparison") exactly:
// only compute when x/y/z all satisfy the same-order stencil at this point.
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
if ((iF + 2 <= imaxF && iF - 2 >= iminF) &&
(jF + 2 <= jmaxF && jF - 2 >= jminF) &&
(kF + 2 <= kmaxF && kF - 2 >= kminF)) {
fx[p] = d12dx * (
fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] -
EIT * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
EIT * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)]
);
fy[p] = d12dy * (
fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] -
EIT * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
EIT * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)]
);
fz[p] = d12dz * (
fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] -
EIT * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
EIT * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)]
);
} else if ((iF + 1 <= imaxF && iF - 1 >= iminF) &&
(jF + 1 <= jmaxF && jF - 1 >= jminF) &&
(kF + 1 <= kmaxF && kF - 1 >= kminF)) {
fx[p] = d2dx * (
-fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fy[p] = d2dy * (
-fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fz[p] = d2dz * (
-fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
@@ -129,39 +131,5 @@ void fderivs(const int ex[3],
}
}
if (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi) {
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
const int kF = k0 + 1;
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
const int jF = j0 + 1;
for (int i0 = i4_lo; i0 <= i4_hi; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
fx[p] = d12dx * (
fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] -
EIT * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
EIT * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)]
);
fy[p] = d12dy * (
fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] -
EIT * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
EIT * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)]
);
fz[p] = d12dz * (
fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] -
EIT * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
EIT * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)]
);
}
}
}
}
// free(fh);
}

View File

@@ -43,13 +43,7 @@ void lopsided_kodis(const int ex[3],
const size_t nz = (size_t)ex3 + 3;
const size_t fh_size = nx * ny * nz;
static thread_local double *fh = NULL;
static thread_local size_t cap = 0;
if (fh_size > cap) {
free(fh);
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
symmetry_bd(3, ex, f, fh, SoA);
@@ -249,4 +243,6 @@ void lopsided_kodis(const int ex[3],
}
}
}
free(fh);
}

View File

@@ -16,7 +16,7 @@ PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/default.profdata
ifeq ($(PGO_MODE),instrument)
## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability
CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) $(OPENMP_FLAGS)
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS)
f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
else
@@ -26,7 +26,7 @@ else
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) $(OPENMP_FLAGS)
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS)
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
endif

View File

@@ -29,9 +29,6 @@ endif
## instrument : PGO Phase 1 instrumentation to collect fresh profile data
PGO_MODE ?= opt
## OpenMP switch for C/C++ kernels
OPENMP_FLAGS ?= -qopenmp
## Interp_Points load balance profiling mode
## off : (default) no load balance instrumentation
## profile : Pass 1 — instrument Interp_Points to collect timing profile
@@ -49,12 +46,12 @@ endif
## Kernel implementation switch
## 1 (default) : use C++ rewrite of bssn_rhs and helper kernels (faster)
## 0 : fall back to original Fortran kernels
USE_CXX_KERNELS ?= 1
USE_CXX_KERNELS ?= 0
## RK4 kernel implementation switch
## 1 (default) : use C/C++ rewrite of rungekutta4_rout (for optimization experiments)
## 0 : use original Fortran rungekutta4_rout.o
USE_CXX_RK4 ?= 1
USE_CXX_RK4 ?= 0
f90 = ifx
f77 = ifx

View File

@@ -317,9 +317,9 @@ void scalar_class::Setup_Initial_Data()
#endif
}
}
void scalar_class::Evolve(int Steps)
{
double prev_clock = 0.0, curr_clock = 0.0;
void scalar_class::Evolve(int Steps)
{
clock_t prev_clock, curr_clock;
double LastDump = 0.0, LastCheck = 0.0;
LastAnas = 0;
@@ -327,8 +327,8 @@ void scalar_class::Evolve(int Steps)
for (int ncount = 1; ncount < Steps + 1; ncount++)
{
if (myrank == 0)
curr_clock = MPI_Wtime();
if (myrank == 0)
curr_clock = clock();
RecursiveStep(0);
LastDump += dT_mon;
@@ -343,13 +343,13 @@ void scalar_class::Evolve(int Steps)
#endif
LastDump = 0;
}
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = MPI_Wtime();
cout << " Timestep # " << ncount << ": integrating to time: " << PhysTime
<< " Computer used " << (curr_clock - prev_clock) << " seconds! " << endl;
}
if (myrank == 0)
{
prev_clock = curr_clock;
curr_clock = clock();
cout << " Timestep # " << ncount << ": integrating to time: " << PhysTime
<< " Computer used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds! " << endl;
}
if (PhysTime >= TotalTime)
break;
}

View File

@@ -9,7 +9,6 @@
import AMSS_NCKU_Input as input_data
import os
import subprocess
import time
@@ -55,44 +54,6 @@ NUMACTL_CPU_BIND = get_last_n_cores_per_socket(n=32)
BUILD_JOBS = 64
def build_abe_runtime_env():
"""Inject OpenMP runtime settings only for the main ABE evolution run."""
runtime_env = os.environ.copy()
omp_threads = max(1, int(getattr(input_data, "OMP_Threads", 1)))
runtime_env["OMP_NUM_THREADS"] = str(omp_threads)
return runtime_env
def build_twopuncture_runtime_env():
"""Let TwoPunctureABE use the runtime default instead of the ABE OMP override."""
runtime_env = os.environ.copy()
runtime_env.pop("OMP_NUM_THREADS", None)
runtime_env.pop("OMP_THREAD_LIMIT", None)
return runtime_env
def build_mpi_launch_args():
"""Build optional host-distribution arguments for mpirun."""
hosts = list(getattr(input_data, "MPI_hosts", []))
ppn = int(getattr(input_data, "MPI_processes_per_node", 0))
if not hosts:
return ""
if ppn > 0:
expected = len(hosts) * ppn
if int(input_data.MPI_processes) != expected:
raise ValueError(
f"MPI_processes={input_data.MPI_processes} does not match "
f"len(MPI_hosts) * MPI_processes_per_node = {expected}"
)
launch_args = f"-hosts {','.join(hosts)}"
if ppn > 0:
launch_args += f" -ppn {ppn}"
return launch_args
##################################################################
@@ -182,38 +143,19 @@ def run_ABE():
print( )
print( " Running the AMSS-NCKU executable file ABE/ABEGPU " )
print( )
print( f" MPI processes = {input_data.MPI_processes}, OMP threads per process = {max(1, int(getattr(input_data, 'OMP_Threads', 1)))}" )
if getattr(input_data, "MPI_hosts", []):
print( f" MPI hosts = {getattr(input_data, 'MPI_hosts', [])}, MPI ranks per node = {int(getattr(input_data, 'MPI_processes_per_node', 0))}" )
print( " Multi-node runs require the working directory to be visible on all MPI hosts. " )
print( )
## Define the command to run; cast other values to strings as needed
mpi_launch_args = build_mpi_launch_args()
if (input_data.GPU_Calculation == "no"):
mpi_command = NUMACTL_CPU_BIND + " mpirun "
if mpi_launch_args:
mpi_command += mpi_launch_args + " "
mpi_command += "-np " + str(input_data.MPI_processes) + " ./ABE"
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
#mpi_command = " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
mpi_command_outfile = "ABE_out.log"
elif (input_data.GPU_Calculation == "yes"):
mpi_command = NUMACTL_CPU_BIND + " mpirun "
if mpi_launch_args:
mpi_command += mpi_launch_args + " "
mpi_command += "-np " + str(input_data.MPI_processes) + " ./ABEGPU"
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU"
mpi_command_outfile = "ABEGPU_out.log"
## Execute the MPI command and stream output
mpi_process = subprocess.Popen(
mpi_command,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
env=build_abe_runtime_env(),
)
mpi_process = subprocess.Popen(mpi_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
## Write ABE run output to file while printing to stdout
with open(mpi_command_outfile, 'w') as file0:
@@ -253,14 +195,7 @@ def run_TwoPunctureABE():
TwoPuncture_command_outfile = "TwoPunctureABE_out.log"
## Execute the command with subprocess.Popen and stream output
TwoPuncture_process = subprocess.Popen(
TwoPuncture_command,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
env=build_twopuncture_runtime_env(),
)
TwoPuncture_process = subprocess.Popen(TwoPuncture_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
## Write TwoPunctureABE run output to file while printing to stdout
with open(TwoPuncture_command_outfile, 'w') as file0:

View File

@@ -65,14 +65,10 @@ def print_input_data( File_directory ):
print( "------------------------------------------------------------------------------------------" )
print( )
print( " Printing the basic parameter and setting in the AMSS-NCKU simulation " )
print( )
print( " The number of MPI processes in the AMSS-NCKU simulation = ", input_data.MPI_processes )
print( " The number of OMP threads per MPI process = ", input_data.OMP_Threads )
if getattr(input_data, "MPI_hosts", []):
print( " The MPI host list in the AMSS-NCKU simulation = ", input_data.MPI_hosts )
print( " The number of MPI ranks launched per host = ", input_data.MPI_processes_per_node )
print( )
print( " Printing the basic parameter and setting in the AMSS-NCKU simulation " )
print( )
print( " The number of MPI processes in the AMSS-NCKU simulation = ", input_data.MPI_processes )
print( )
print( " The form of computational equation = ", input_data.Equation_Class )
print( " The initial data in this simulation = ", input_data.Initial_Data_Method )
print( )
@@ -144,14 +140,10 @@ def print_input_data( File_directory ):
file0 = open(filepath, 'w')
print( file=file0 )
print( " Printing the basic parameter and setting in the AMSS-NCKU simulation ", file=file0 )
print( file=file0 )
print( " The number of MPI processes in the AMSS-NCKU simulation = ", input_data.MPI_processes, file=file0 )
print( " The number of OMP threads per MPI process = ", input_data.OMP_Threads, file=file0 )
if getattr(input_data, "MPI_hosts", []):
print( " The MPI host list in the AMSS-NCKU simulation = ", input_data.MPI_hosts, file=file0 )
print( " The number of MPI ranks launched per host = ", input_data.MPI_processes_per_node, file=file0 )
print( file=file0 )
print( " Printing the basic parameter and setting in the AMSS-NCKU simulation ", file=file0 )
print( file=file0 )
print( " The number of MPI processes in the AMSS-NCKU simulation = ", input_data.MPI_processes, file=file0 )
print( file=file0 )
print( " The form of computational equation = ", input_data.Equation_Class, file=file0 )
print( " The initial data in this simulation = ", input_data.Initial_Data_Method, file=file0 )
print( file=file0 )