Compare commits

..

3 Commits

Author SHA1 Message Date
9c44d1c885 fix(bssn_rhs) 2026-03-03 16:00:45 +08:00
4b9de28feb 将 Restrict/Prolong 链路里的 coarse-level Sync_cached 改为可选(默认跳过)
OutBdLow2Hi_cached 读的是 coarse owned 区域(非 coarse ghost/buffer)
回退旧行为:编译时定义 RP_SYNC_COARSE_AFTER_RESTRICT=1
2026-03-03 14:25:27 +08:00
4eb5dc4ddb 删除重复的一次 chi 一阶导计算 2026-03-03 14:23:56 +08:00
13 changed files with 596 additions and 746 deletions

View File

@@ -13,17 +13,14 @@ import numpy
## Setting MPI processes and the output file directory ## Setting MPI processes and the output file directory
File_directory = "GW150914" ## output file directory File_directory = "GW150914" ## output file directory
Output_directory = "binary_output" ## binary data file directory Output_directory = "binary_output" ## binary data file directory
## The file directory name should not be too long ## The file directory name should not be too long
MPI_processes = 64 ## number of mpi processes used in the simulation MPI_processes = 64 ## number of mpi processes used in the simulation
OMP_Threads = 3 ## number of OpenMP threads used by each MPI process
MPI_hosts = ["localhost", "192.168.20.102"] ## MPI hosts for multi-node runs GPU_Calculation = "no" ## Use GPU or not
MPI_processes_per_node = 32 ## MPI ranks launched on each node in MPI_hosts ## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface)
CPU_Part = 1.0
GPU_Calculation = "no" ## Use GPU or not
## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface)
CPU_Part = 1.0
GPU_Part = 0.0 GPU_Part = 0.0
################################################# #################################################
@@ -53,7 +50,7 @@ Check_Time = 100.0
Dump_Time = 100.0 ## time inteval dT for dumping binary data Dump_Time = 100.0 ## time inteval dT for dumping binary data
D2_Dump_Time = 100.0 ## dump the ascii data for 2d surface after dT' D2_Dump_Time = 100.0 ## dump the ascii data for 2d surface after dT'
Analysis_Time = 0.1 ## dump the puncture position and GW psi4 after dT" Analysis_Time = 0.1 ## dump the puncture position and GW psi4 after dT"
Evolution_Step_Number = 10000000 ## stop the calculation after the maximal step number Evolution_Step_Number = 10000000 ## stop the calculation after the maximal step number
Courant_Factor = 0.5 ## Courant Factor Courant_Factor = 0.5 ## Courant Factor
Dissipation = 0.15 ## Kreiss-Oliger Dissipation Strength Dissipation = 0.15 ## Kreiss-Oliger Dissipation Strength

View File

@@ -36,18 +36,12 @@ using namespace std;
#include "myglobal.h" #include "myglobal.h"
#endif #endif
#include "perf.h" #include "perf.h"
#include "derivatives.h" #include "derivatives.h"
#include "ricci_gamma.h" #include "ricci_gamma.h"
// Compile-time switch for per-timestep memory usage collection/printing. //================================================================================================
// Default is OFF to reduce overhead in production runs.
#ifndef BSSN_ENABLE_MEM_USAGE_LOG
#define BSSN_ENABLE_MEM_USAGE_LOG 0
#endif
//================================================================================================
// define bssn_class // define bssn_class
@@ -2034,9 +2028,9 @@ void bssn_class::Read_Ansorg()
//================================================================================================ //================================================================================================
void bssn_class::Evolve(int Steps) void bssn_class::Evolve(int Steps)
{ {
double prev_clock = 0.0, curr_clock = 0.0; clock_t prev_clock, curr_clock;
double LastDump = 0.0, LastCheck = 0.0, Last2dDump = 0.0; double LastDump = 0.0, LastCheck = 0.0, Last2dDump = 0.0;
LastAnas = 0; LastAnas = 0;
#if 0 #if 0
@@ -2135,24 +2129,22 @@ void bssn_class::Evolve(int Steps)
#endif #endif
*/ */
#if BSSN_ENABLE_MEM_USAGE_LOG perf bssn_perf;
perf bssn_perf; size_t current_min, current_avg, current_max, peak_min, peak_avg, peak_max;
size_t current_min, current_avg, current_max, peak_min, peak_avg, peak_max;
#endif
for (int lev = 0; lev < GH->levels; lev++) for (int lev = 0; lev < GH->levels; lev++)
GH->Lt[lev] = PhysTime; GH->Lt[lev] = PhysTime;
GH->settrfls(trfls); GH->settrfls(trfls);
for (int ncount = 1; ncount < Steps + 1; ncount++) for (int ncount = 1; ncount < Steps + 1; ncount++)
{ {
// special for large mass ratio consideration // special for large mass ratio consideration
// if(fabs(Porg0[0][0]-Porg0[1][0])+fabs(Porg0[0][1]-Porg0[1][1])+fabs(Porg0[0][2]-Porg0[1][2])<1e-6) // if(fabs(Porg0[0][0]-Porg0[1][0])+fabs(Porg0[0][1]-Porg0[1][1])+fabs(Porg0[0][2]-Porg0[1][2])<1e-6)
// { GH->levels=GH->movls; } // { GH->levels=GH->movls; }
if (myrank == 0) if (myrank == 0)
curr_clock = MPI_Wtime(); curr_clock = clock();
#if (PSTR == 0) #if (PSTR == 0)
RecursiveStep(0); RecursiveStep(0);
#elif (PSTR == 1 || PSTR == 2 || PSTR == 3) #elif (PSTR == 1 || PSTR == 2 || PSTR == 3)
@@ -2205,16 +2197,16 @@ void bssn_class::Evolve(int Steps)
} }
} }
if (myrank == 0) if (myrank == 0)
{ {
prev_clock = curr_clock; prev_clock = curr_clock;
curr_clock = MPI_Wtime(); curr_clock = clock();
cout << endl; cout << endl;
cout << " Timestep # " << ncount << ": integrating to time: " << PhysTime << " " cout << " Timestep # " << ncount << ": integrating to time: " << PhysTime << " "
<< " Computer used " << (curr_clock - prev_clock) << " Computer used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl; << " seconds! " << endl;
// cout << endl; // cout << endl;
} }
if (PhysTime >= TotalTime) if (PhysTime >= TotalTime)
break; break;
@@ -2232,23 +2224,21 @@ void bssn_class::Evolve(int Steps)
// fgt(PhysTime-dT_mon,StartTime,dT_mon/2),ErrorMonitor); // fgt(PhysTime-dT_mon,StartTime,dT_mon/2),ErrorMonitor);
#endif #endif
#if BSSN_ENABLE_MEM_USAGE_LOG // Retrieve memory usage information used during computation; master process prints it
// Retrieve memory usage information used during computation; master process prints it bssn_perf.MemoryUsage(&current_min, &current_avg, &current_max,
bssn_perf.MemoryUsage(&current_min, &current_avg, &current_max, &peak_min, &peak_avg, &peak_max, nprocs);
&peak_min, &peak_avg, &peak_max, nprocs); if (myrank == 0)
if (myrank == 0) {
{ printf(" Memory usage: current %0.4lg/%0.4lg/%0.4lgMB, "
printf(" Memory usage: current %0.4lg/%0.4lg/%0.4lgMB, " "peak %0.4lg/%0.4lg/%0.4lgMB\n",
"peak %0.4lg/%0.4lg/%0.4lgMB\n", (double)current_min / (1024.0 * 1024.0),
(double)current_min / (1024.0 * 1024.0), (double)current_avg / (1024.0 * 1024.0),
(double)current_avg / (1024.0 * 1024.0), (double)current_max / (1024.0 * 1024.0),
(double)current_max / (1024.0 * 1024.0), (double)peak_min / (1024.0 * 1024.0),
(double)peak_min / (1024.0 * 1024.0), (double)peak_avg / (1024.0 * 1024.0),
(double)peak_avg / (1024.0 * 1024.0), (double)peak_max / (1024.0 * 1024.0));
(double)peak_max / (1024.0 * 1024.0)); cout << endl;
cout << endl; }
}
#endif
// Output puncture positions at each step // Output puncture positions at each step
if (myrank == 0) if (myrank == 0)
@@ -5755,11 +5745,17 @@ void bssn_class::SHStep()
//================================================================================================ //================================================================================================
// 0: do not use mixing two levels data for OutBD; 1: do use // 0: do not use mixing two levels data for OutBD; 1: do use
#define MIXOUTB 0 #define MIXOUTB 0
void bssn_class::RestrictProlong(int lev, int YN, bool BB, // In the cached Restrict->OutBdLow2Hi path, coarse Sync is usually redundant:
MyList<var> *SL, MyList<var> *OL, MyList<var> *corL) // OutBdLow2Hi_cached reads coarse owned cells (build_owned_gsl type-4), not coarse ghost/buffer cells.
// Keep a switch to restore the old behavior if needed for debugging.
#ifndef RP_SYNC_COARSE_AFTER_RESTRICT
#define RP_SYNC_COARSE_AFTER_RESTRICT 0
#endif
void bssn_class::RestrictProlong(int lev, int YN, bool BB,
MyList<var> *SL, MyList<var> *OL, MyList<var> *corL)
// we assume // we assume
// StateList 1 ----------- // StateList 1 -----------
// //
@@ -5821,7 +5817,9 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); // misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
#endif #endif
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]); #if (RP_SYNC_COARSE_AFTER_RESTRICT == 1)
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
#endif
#if (PSTR == 1 || PSTR == 2) #if (PSTR == 1 || PSTR == 2)
// a_stream.clear(); // a_stream.clear();
@@ -5872,7 +5870,9 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); // misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
#endif #endif
Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]); #if (RP_SYNC_COARSE_AFTER_RESTRICT == 1)
Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]);
#endif
#if (PSTR == 1 || PSTR == 2) #if (PSTR == 1 || PSTR == 2)
// a_stream.clear(); // a_stream.clear();
@@ -5958,7 +5958,9 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry); Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry);
#endif #endif
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]); #if (RP_SYNC_COARSE_AFTER_RESTRICT == 1)
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
#endif
#if (RPB == 0) #if (RPB == 0)
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
@@ -5980,7 +5982,9 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry); Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry);
#endif #endif
Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]); #if (RP_SYNC_COARSE_AFTER_RESTRICT == 1)
Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]);
#endif
#if (RPB == 0) #if (RPB == 0)
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
@@ -6045,7 +6049,9 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, GH->rsul[lev], Symmetry); Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, GH->rsul[lev], Symmetry);
#endif #endif
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]); #if (RP_SYNC_COARSE_AFTER_RESTRICT == 1)
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
#endif
#if (RPB == 0) #if (RPB == 0)
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
@@ -6069,7 +6075,9 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, GH->rsul[lev], Symmetry); Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, GH->rsul[lev], Symmetry);
#endif #endif
Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]); #if (RP_SYNC_COARSE_AFTER_RESTRICT == 1)
Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]);
#endif
#if (RPB == 0) #if (RPB == 0)
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
@@ -6093,7 +6101,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
//================================================================================================ //================================================================================================
void bssn_class::ProlongRestrict(int lev, int YN, bool BB) void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
{ {
if (lev > 0) if (lev > 0)
{ {
@@ -6146,15 +6154,18 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
#else #else
Parallel::Restrict_after(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry); Parallel::Restrict_after(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry);
#endif #endif
Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]); #if (RP_SYNC_COARSE_AFTER_RESTRICT == 1)
Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]);
#endif
} }
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]); Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]);
} }
} }
#undef MIXOUTB #undef MIXOUTB
#undef RP_SYNC_COARSE_AFTER_RESTRICT
//================================================================================================
//================================================================================================

View File

@@ -1891,7 +1891,7 @@ void bssn_class::Read_Ansorg()
void bssn_class::Evolve(int Steps) void bssn_class::Evolve(int Steps)
{ {
double prev_clock = 0.0, curr_clock = 0.0; clock_t prev_clock, curr_clock;
double LastDump = 0.0, LastCheck = 0.0, Last2dDump = 0.0; double LastDump = 0.0, LastCheck = 0.0, Last2dDump = 0.0;
LastAnas = 0; LastAnas = 0;
#if 0 #if 0
@@ -2035,12 +2035,10 @@ void bssn_class::Evolve(int Steps)
GH->settrfls(trfls); GH->settrfls(trfls);
for (int ncount = 1; ncount < Steps + 1; ncount++) for (int ncount = 1; ncount < Steps + 1; ncount++)
{ {
if (myrank == 0) cout << "Before Step: " << ncount << " My Rank: " << myrank
curr_clock = MPI_Wtime(); << " takes " << MPI_Wtime() - beg_time << " seconds!" << endl;
cout << "Before Step: " << ncount << " My Rank: " << myrank
<< " takes " << MPI_Wtime() - beg_time << " seconds!" << endl;
beg_time = MPI_Wtime(); beg_time = MPI_Wtime();
#if (PSTR == 0) #if (PSTR == 0)
RecursiveStep(0); RecursiveStep(0);
@@ -2097,10 +2095,10 @@ void bssn_class::Evolve(int Steps)
if (myrank == 0) if (myrank == 0)
{ {
prev_clock = curr_clock; prev_clock = curr_clock;
curr_clock = MPI_Wtime(); curr_clock = clock();
cout << "Timestep # " << ncount << ": integrating to time: " << PhysTime << endl; cout << "Timestep # " << ncount << ": integrating to time: " << PhysTime << endl;
cout << "used " << (curr_clock - prev_clock) << " seconds!" << endl; cout << "used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds!" << endl;
} }
if (PhysTime >= TotalTime) if (PhysTime >= TotalTime)

View File

@@ -59,10 +59,9 @@
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Rxx,Rxy,Rxz,Ryy,Ryz,Rzz real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Rxx,Rxy,Rxz,Ryy,Ryz,Rzz
real*8,intent(in) :: eps real*8,intent(in) :: eps
real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) :: ham_Res, movx_Res, movy_Res, movz_Res real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) :: ham_Res, movx_Res, movy_Res, movz_Res
real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) :: Gmx_Res, Gmy_Res, Gmz_Res real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) :: Gmx_Res, Gmy_Res, Gmz_Res
! gont = 0: success; gont = 1: something wrong ! gont = 0: success; gont = 1: something wrong
integer::gont integer::gont
integer :: i,j,k
!~~~~~~> Other variables: !~~~~~~> Other variables:
@@ -84,18 +83,11 @@
real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz
real*8, dimension(ex(1),ex(2),ex(3)) :: gupyy,gupyz,gupzz real*8, dimension(ex(1),ex(2),ex(3)) :: gupyy,gupyz,gupzz
real*8,dimension(3) ::SSS,AAS,ASA,SAA,ASS,SAS,SSA real*8,dimension(3) ::SSS,AAS,ASA,SAA,ASS,SAS,SSA
real*8 :: dX, dY, dZ, PI real*8 :: dX, dY, dZ, PI
real*8 :: divb_loc,det_loc real*8, parameter :: ZEO = 0.d0,ONE = 1.D0, TWO = 2.D0, FOUR = 4.D0
real*8 :: gupxx_loc,gupxy_loc,gupxz_loc,gupyy_loc,gupyz_loc,gupzz_loc real*8, parameter :: EIGHT = 8.D0, HALF = 0.5D0, THR = 3.d0
real*8 :: Rxx_loc,Rxy_loc,Rxz_loc,Ryy_loc,Ryz_loc,Rzz_loc real*8, parameter :: SYM = 1.D0, ANTI= - 1.D0
real*8 :: fxx_loc,fxy_loc,fxz_loc
real*8 :: Gamxa_loc,Gamya_loc,Gamza_loc
real*8 :: f_loc,chin_loc
real*8 :: l_fxx,l_fxy,l_fxz,l_fyy,l_fyz,l_fzz,S_loc
real*8, parameter :: ZEO = 0.d0,ONE = 1.D0, TWO = 2.D0, FOUR = 4.D0
real*8, parameter :: EIGHT = 8.D0, HALF = 0.5D0, THR = 3.d0
real*8, parameter :: SYM = 1.D0, ANTI= - 1.D0
double precision,parameter::FF = 0.75d0,eta=2.d0 double precision,parameter::FF = 0.75d0,eta=2.d0
real*8, parameter :: F1o3 = 1.D0/3.D0, F2o3 = 2.D0/3.D0,F3o2=1.5d0, F1o6 = 1.D0/6.D0 real*8, parameter :: F1o3 = 1.D0/3.D0, F2o3 = 2.D0/3.D0,F3o2=1.5d0, F1o6 = 1.D0/6.D0
real*8, parameter :: F16=1.6d1,F8=8.d0 real*8, parameter :: F16=1.6d1,F8=8.d0
@@ -104,11 +96,11 @@
real*8, dimension(ex(1),ex(2),ex(3)) :: reta real*8, dimension(ex(1),ex(2),ex(3)) :: reta
#endif #endif
#if (GAUGE == 6 || GAUGE == 7) #if (GAUGE == 6 || GAUGE == 7)
integer :: BHN integer :: BHN,i,j,k
real*8, dimension(9) :: Porg real*8, dimension(9) :: Porg
real*8, dimension(3) :: Mass real*8, dimension(3) :: Mass
real*8 :: r1,r2,M,A,w1,w2,C1,C2 real*8 :: r1,r2,M,A,w1,w2,C1,C2
real*8, dimension(ex(1),ex(2),ex(3)) :: reta real*8, dimension(ex(1),ex(2),ex(3)) :: reta
call getpbh(BHN,Porg,Mass) call getpbh(BHN,Porg,Mass)
@@ -153,204 +145,174 @@
dY = Y(2) - Y(1) dY = Y(2) - Y(1)
dZ = Z(2) - Z(1) dZ = Z(2) - Z(1)
do k=1,ex(3) alpn1 = Lap + ONE
do j=1,ex(2) chin1 = chi + ONE
do i=1,ex(1) gxx = dxx + ONE
alpn1(i,j,k) = Lap(i,j,k) + ONE gyy = dyy + ONE
chin1(i,j,k) = chi(i,j,k) + ONE gzz = dzz + ONE
gxx(i,j,k) = dxx(i,j,k) + ONE
gyy(i,j,k) = dyy(i,j,k) + ONE
gzz(i,j,k) = dzz(i,j,k) + ONE
enddo
enddo
enddo
call fderivs(ex,betax,betaxx,betaxy,betaxz,X,Y,Z,ANTI, SYM, SYM,Symmetry,Lev) call fderivs(ex,betax,betaxx,betaxy,betaxz,X,Y,Z,ANTI, SYM, SYM,Symmetry,Lev)
call fderivs(ex,betay,betayx,betayy,betayz,X,Y,Z, SYM,ANTI, SYM,Symmetry,Lev) call fderivs(ex,betay,betayx,betayy,betayz,X,Y,Z, SYM,ANTI, SYM,Symmetry,Lev)
call fderivs(ex,betaz,betazx,betazy,betazz,X,Y,Z, SYM, SYM,ANTI,Symmetry,Lev) call fderivs(ex,betaz,betazx,betazy,betazz,X,Y,Z, SYM, SYM,ANTI,Symmetry,Lev)
call fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev) div_beta = betaxx + betayy + betazz
call fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev)
call fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev) chi_rhs = F2o3 *chin1*( alpn1 * trK - div_beta ) !rhs for chi
call fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev)
call fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev) call fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
call fderivs(ex,dyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev) call fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev)
call fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev) call fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev)
call fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev) call fderivs(ex,dyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
call fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev)
do k=1,ex(3) call fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
do j=1,ex(2)
do i=1,ex(1) gxx_rhs = - TWO * alpn1 * Axx - F2o3 * gxx * div_beta + &
divb_loc = betaxx(i,j,k) + betayy(i,j,k) + betazz(i,j,k) TWO *( gxx * betaxx + gxy * betayx + gxz * betazx)
div_beta(i,j,k) = divb_loc
gyy_rhs = - TWO * alpn1 * Ayy - F2o3 * gyy * div_beta + &
chi_rhs(i,j,k) = F2o3 * chin1(i,j,k) * (alpn1(i,j,k) * trK(i,j,k) - divb_loc) TWO *( gxy * betaxy + gyy * betayy + gyz * betazy)
gxx_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Axx(i,j,k) - F2o3 * gxx(i,j,k) * divb_loc + & gzz_rhs = - TWO * alpn1 * Azz - F2o3 * gzz * div_beta + &
TWO * ( gxx(i,j,k) * betaxx(i,j,k) + gxy(i,j,k) * betayx(i,j,k) + gxz(i,j,k) * betazx(i,j,k) ) TWO *( gxz * betaxz + gyz * betayz + gzz * betazz)
gyy_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Ayy(i,j,k) - F2o3 * gyy(i,j,k) * divb_loc + & gxy_rhs = - TWO * alpn1 * Axy + F1o3 * gxy * div_beta + &
TWO * ( gxy(i,j,k) * betaxy(i,j,k) + gyy(i,j,k) * betayy(i,j,k) + gyz(i,j,k) * betazy(i,j,k) ) gxx * betaxy + gxz * betazy + &
gyy * betayx + gyz * betazx &
gzz_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Azz(i,j,k) - F2o3 * gzz(i,j,k) * divb_loc + & - gxy * betazz
TWO * ( gxz(i,j,k) * betaxz(i,j,k) + gyz(i,j,k) * betayz(i,j,k) + gzz(i,j,k) * betazz(i,j,k) )
gyz_rhs = - TWO * alpn1 * Ayz + F1o3 * gyz * div_beta + &
gxy_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Axy(i,j,k) + F1o3 * gxy(i,j,k) * divb_loc + & gxy * betaxz + gyy * betayz + &
gxx(i,j,k) * betaxy(i,j,k) + gxz(i,j,k) * betazy(i,j,k) + gyy(i,j,k) * betayx(i,j,k) + & gxz * betaxy + gzz * betazy &
gyz(i,j,k) * betazx(i,j,k) - gxy(i,j,k) * betazz(i,j,k) - gyz * betaxx
gyz_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Ayz(i,j,k) + F1o3 * gyz(i,j,k) * divb_loc + & gxz_rhs = - TWO * alpn1 * Axz + F1o3 * gxz * div_beta + &
gxy(i,j,k) * betaxz(i,j,k) + gyy(i,j,k) * betayz(i,j,k) + gxz(i,j,k) * betaxy(i,j,k) + & gxx * betaxz + gxy * betayz + &
gzz(i,j,k) * betazy(i,j,k) - gyz(i,j,k) * betaxx(i,j,k) gyz * betayx + gzz * betazx &
- gxz * betayy !rhs for gij
gxz_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Axz(i,j,k) + F1o3 * gxz(i,j,k) * divb_loc + &
gxx(i,j,k) * betaxz(i,j,k) + gxy(i,j,k) * betayz(i,j,k) + gyz(i,j,k) * betayx(i,j,k) + & ! invert tilted metric
gzz(i,j,k) * betazx(i,j,k) - gxz(i,j,k) * betayy(i,j,k) gupzz = gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - &
gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz
det_loc = gxx(i,j,k) * gyy(i,j,k) * gzz(i,j,k) + gxy(i,j,k) * gyz(i,j,k) * gxz(i,j,k) + & gupxx = ( gyy * gzz - gyz * gyz ) / gupzz
gxz(i,j,k) * gxy(i,j,k) * gyz(i,j,k) - gxz(i,j,k) * gyy(i,j,k) * gxz(i,j,k) - & gupxy = - ( gxy * gzz - gyz * gxz ) / gupzz
gxy(i,j,k) * gxy(i,j,k) * gzz(i,j,k) - gxx(i,j,k) * gyz(i,j,k) * gyz(i,j,k) gupxz = ( gxy * gyz - gyy * gxz ) / gupzz
gupxx_loc = ( gyy(i,j,k) * gzz(i,j,k) - gyz(i,j,k) * gyz(i,j,k) ) / det_loc gupyy = ( gxx * gzz - gxz * gxz ) / gupzz
gupxy_loc = - ( gxy(i,j,k) * gzz(i,j,k) - gyz(i,j,k) * gxz(i,j,k) ) / det_loc gupyz = - ( gxx * gyz - gxy * gxz ) / gupzz
gupxz_loc = ( gxy(i,j,k) * gyz(i,j,k) - gyy(i,j,k) * gxz(i,j,k) ) / det_loc gupzz = ( gxx * gyy - gxy * gxy ) / gupzz
gupyy_loc = ( gxx(i,j,k) * gzz(i,j,k) - gxz(i,j,k) * gxz(i,j,k) ) / det_loc
gupyz_loc = - ( gxx(i,j,k) * gyz(i,j,k) - gxy(i,j,k) * gxz(i,j,k) ) / det_loc if(co == 0)then
gupzz_loc = ( gxx(i,j,k) * gyy(i,j,k) - gxy(i,j,k) * gxy(i,j,k) ) / det_loc ! Gam^i_Res = Gam^i + gup^ij_,j
gupxx(i,j,k) = gupxx_loc Gmx_Res = Gamx - (gupxx*(gupxx*gxxx+gupxy*gxyx+gupxz*gxzx)&
gupxy(i,j,k) = gupxy_loc +gupxy*(gupxx*gxyx+gupxy*gyyx+gupxz*gyzx)&
gupxz(i,j,k) = gupxz_loc +gupxz*(gupxx*gxzx+gupxy*gyzx+gupxz*gzzx)&
gupyy(i,j,k) = gupyy_loc +gupxx*(gupxy*gxxy+gupyy*gxyy+gupyz*gxzy)&
gupyz(i,j,k) = gupyz_loc +gupxy*(gupxy*gxyy+gupyy*gyyy+gupyz*gyzy)&
gupzz(i,j,k) = gupzz_loc +gupxz*(gupxy*gxzy+gupyy*gyzy+gupyz*gzzy)&
+gupxx*(gupxz*gxxz+gupyz*gxyz+gupzz*gxzz)&
if(co == 0)then +gupxy*(gupxz*gxyz+gupyz*gyyz+gupzz*gyzz)&
Gmx_Res(i,j,k) = Gamx(i,j,k) - ( & +gupxz*(gupxz*gxzz+gupyz*gyzz+gupzz*gzzz))
gupxx_loc*(gupxx_loc*gxxx(i,j,k)+gupxy_loc*gxyx(i,j,k)+gupxz_loc*gxzx(i,j,k)) + & Gmy_Res = Gamy - (gupxx*(gupxy*gxxx+gupyy*gxyx+gupyz*gxzx)&
gupxy_loc*(gupxx_loc*gxyx(i,j,k)+gupxy_loc*gyyx(i,j,k)+gupxz_loc*gyzx(i,j,k)) + & +gupxy*(gupxy*gxyx+gupyy*gyyx+gupyz*gyzx)&
gupxz_loc*(gupxx_loc*gxzx(i,j,k)+gupxy_loc*gyzx(i,j,k)+gupxz_loc*gzzx(i,j,k)) + & +gupxz*(gupxy*gxzx+gupyy*gyzx+gupyz*gzzx)&
gupxx_loc*(gupxy_loc*gxxy(i,j,k)+gupyy_loc*gxyy(i,j,k)+gupyz_loc*gxzy(i,j,k)) + & +gupxy*(gupxy*gxxy+gupyy*gxyy+gupyz*gxzy)&
gupxy_loc*(gupxy_loc*gxyy(i,j,k)+gupyy_loc*gyyy(i,j,k)+gupyz_loc*gyzy(i,j,k)) + & +gupyy*(gupxy*gxyy+gupyy*gyyy+gupyz*gyzy)&
gupxz_loc*(gupxy_loc*gxzy(i,j,k)+gupyy_loc*gyzy(i,j,k)+gupyz_loc*gzzy(i,j,k)) + & +gupyz*(gupxy*gxzy+gupyy*gyzy+gupyz*gzzy)&
gupxx_loc*(gupxz_loc*gxxz(i,j,k)+gupyz_loc*gxyz(i,j,k)+gupzz_loc*gxzz(i,j,k)) + & +gupxy*(gupxz*gxxz+gupyz*gxyz+gupzz*gxzz)&
gupxy_loc*(gupxz_loc*gxyz(i,j,k)+gupyz_loc*gyyz(i,j,k)+gupzz_loc*gyzz(i,j,k)) + & +gupyy*(gupxz*gxyz+gupyz*gyyz+gupzz*gyzz)&
gupxz_loc*(gupxz_loc*gxzz(i,j,k)+gupyz_loc*gyzz(i,j,k)+gupzz_loc*gzzz(i,j,k))) +gupyz*(gupxz*gxzz+gupyz*gyzz+gupzz*gzzz))
Gmy_Res(i,j,k) = Gamy(i,j,k) - ( & Gmz_Res = Gamz - (gupxx*(gupxz*gxxx+gupyz*gxyx+gupzz*gxzx)&
gupxx_loc*(gupxy_loc*gxxx(i,j,k)+gupyy_loc*gxyx(i,j,k)+gupyz_loc*gxzx(i,j,k)) + & +gupxy*(gupxz*gxyx+gupyz*gyyx+gupzz*gyzx)&
gupxy_loc*(gupxy_loc*gxyx(i,j,k)+gupyy_loc*gyyx(i,j,k)+gupyz_loc*gyzx(i,j,k)) + & +gupxz*(gupxz*gxzx+gupyz*gyzx+gupzz*gzzx)&
gupxz_loc*(gupxy_loc*gxzx(i,j,k)+gupyy_loc*gyzx(i,j,k)+gupyz_loc*gzzx(i,j,k)) + & +gupxy*(gupxz*gxxy+gupyz*gxyy+gupzz*gxzy)&
gupxy_loc*(gupxy_loc*gxxy(i,j,k)+gupyy_loc*gxyy(i,j,k)+gupyz_loc*gxzy(i,j,k)) + & +gupyy*(gupxz*gxyy+gupyz*gyyy+gupzz*gyzy)&
gupyy_loc*(gupxy_loc*gxyy(i,j,k)+gupyy_loc*gyyy(i,j,k)+gupyz_loc*gyzy(i,j,k)) + & +gupyz*(gupxz*gxzy+gupyz*gyzy+gupzz*gzzy)&
gupyz_loc*(gupxy_loc*gxzy(i,j,k)+gupyy_loc*gyzy(i,j,k)+gupyz_loc*gzzy(i,j,k)) + & +gupxz*(gupxz*gxxz+gupyz*gxyz+gupzz*gxzz)&
gupxy_loc*(gupxz_loc*gxxz(i,j,k)+gupyz_loc*gxyz(i,j,k)+gupzz_loc*gxzz(i,j,k)) + & +gupyz*(gupxz*gxyz+gupyz*gyyz+gupzz*gyzz)&
gupyy_loc*(gupxz_loc*gxyz(i,j,k)+gupyz_loc*gyyz(i,j,k)+gupzz_loc*gyzz(i,j,k)) + & +gupzz*(gupxz*gxzz+gupyz*gyzz+gupzz*gzzz))
gupyz_loc*(gupxz_loc*gxzz(i,j,k)+gupyz_loc*gyzz(i,j,k)+gupzz_loc*gzzz(i,j,k))) endif
Gmz_Res(i,j,k) = Gamz(i,j,k) - ( &
gupxx_loc*(gupxz_loc*gxxx(i,j,k)+gupyz_loc*gxyx(i,j,k)+gupzz_loc*gxzx(i,j,k)) + & ! second kind of connection
gupxy_loc*(gupxz_loc*gxyx(i,j,k)+gupyz_loc*gyyx(i,j,k)+gupzz_loc*gyzx(i,j,k)) + & Gamxxx =HALF*( gupxx*gxxx + gupxy*(TWO*gxyx - gxxy ) + gupxz*(TWO*gxzx - gxxz ))
gupxz_loc*(gupxz_loc*gxzx(i,j,k)+gupyz_loc*gyzx(i,j,k)+gupzz_loc*gzzx(i,j,k)) + & Gamyxx =HALF*( gupxy*gxxx + gupyy*(TWO*gxyx - gxxy ) + gupyz*(TWO*gxzx - gxxz ))
gupxy_loc*(gupxz_loc*gxxy(i,j,k)+gupyz_loc*gxyy(i,j,k)+gupzz_loc*gxzy(i,j,k)) + & Gamzxx =HALF*( gupxz*gxxx + gupyz*(TWO*gxyx - gxxy ) + gupzz*(TWO*gxzx - gxxz ))
gupyy_loc*(gupxz_loc*gxyy(i,j,k)+gupyz_loc*gyyy(i,j,k)+gupzz_loc*gyzy(i,j,k)) + &
gupyz_loc*(gupxz_loc*gxzy(i,j,k)+gupyz_loc*gyzy(i,j,k)+gupzz_loc*gzzy(i,j,k)) + & Gamxyy =HALF*( gupxx*(TWO*gxyy - gyyx ) + gupxy*gyyy + gupxz*(TWO*gyzy - gyyz ))
gupxz_loc*(gupxz_loc*gxxz(i,j,k)+gupyz_loc*gxyz(i,j,k)+gupzz_loc*gxzz(i,j,k)) + & Gamyyy =HALF*( gupxy*(TWO*gxyy - gyyx ) + gupyy*gyyy + gupyz*(TWO*gyzy - gyyz ))
gupyz_loc*(gupxz_loc*gxyz(i,j,k)+gupyz_loc*gyyz(i,j,k)+gupzz_loc*gyzz(i,j,k)) + & Gamzyy =HALF*( gupxz*(TWO*gxyy - gyyx ) + gupyz*gyyy + gupzz*(TWO*gyzy - gyyz ))
gupzz_loc*(gupxz_loc*gxzz(i,j,k)+gupyz_loc*gyzz(i,j,k)+gupzz_loc*gzzz(i,j,k)))
endif Gamxzz =HALF*( gupxx*(TWO*gxzz - gzzx ) + gupxy*(TWO*gyzz - gzzy ) + gupxz*gzzz)
Gamyzz =HALF*( gupxy*(TWO*gxzz - gzzx ) + gupyy*(TWO*gyzz - gzzy ) + gupyz*gzzz)
Gamxxx(i,j,k)=HALF*( gupxx_loc*gxxx(i,j,k) + gupxy_loc*(TWO*gxyx(i,j,k) - gxxy(i,j,k)) + gupxz_loc*(TWO*gxzx(i,j,k) - gxxz(i,j,k))) Gamzzz =HALF*( gupxz*(TWO*gxzz - gzzx ) + gupyz*(TWO*gyzz - gzzy ) + gupzz*gzzz)
Gamyxx(i,j,k)=HALF*( gupxy_loc*gxxx(i,j,k) + gupyy_loc*(TWO*gxyx(i,j,k) - gxxy(i,j,k)) + gupyz_loc*(TWO*gxzx(i,j,k) - gxxz(i,j,k)))
Gamzxx(i,j,k)=HALF*( gupxz_loc*gxxx(i,j,k) + gupyz_loc*(TWO*gxyx(i,j,k) - gxxy(i,j,k)) + gupzz_loc*(TWO*gxzx(i,j,k) - gxxz(i,j,k))) Gamxxy =HALF*( gupxx*gxxy + gupxy*gyyx + gupxz*( gxzy + gyzx - gxyz ) )
Gamyxy =HALF*( gupxy*gxxy + gupyy*gyyx + gupyz*( gxzy + gyzx - gxyz ) )
Gamxyy(i,j,k)=HALF*( gupxx_loc*(TWO*gxyy(i,j,k) - gyyx(i,j,k)) + gupxy_loc*gyyy(i,j,k) + gupxz_loc*(TWO*gyzy(i,j,k) - gyyz(i,j,k))) Gamzxy =HALF*( gupxz*gxxy + gupyz*gyyx + gupzz*( gxzy + gyzx - gxyz ) )
Gamyyy(i,j,k)=HALF*( gupxy_loc*(TWO*gxyy(i,j,k) - gyyx(i,j,k)) + gupyy_loc*gyyy(i,j,k) + gupyz_loc*(TWO*gyzy(i,j,k) - gyyz(i,j,k)))
Gamzyy(i,j,k)=HALF*( gupxz_loc*(TWO*gxyy(i,j,k) - gyyx(i,j,k)) + gupyz_loc*gyyy(i,j,k) + gupzz_loc*(TWO*gyzy(i,j,k) - gyyz(i,j,k))) Gamxxz =HALF*( gupxx*gxxz + gupxy*( gxyz + gyzx - gxzy ) + gupxz*gzzx )
Gamyxz =HALF*( gupxy*gxxz + gupyy*( gxyz + gyzx - gxzy ) + gupyz*gzzx )
Gamxzz(i,j,k)=HALF*( gupxx_loc*(TWO*gxzz(i,j,k) - gzzx(i,j,k)) + gupxy_loc*(TWO*gyzz(i,j,k) - gzzy(i,j,k)) + gupxz_loc*gzzz(i,j,k)) Gamzxz =HALF*( gupxz*gxxz + gupyz*( gxyz + gyzx - gxzy ) + gupzz*gzzx )
Gamyzz(i,j,k)=HALF*( gupxy_loc*(TWO*gxzz(i,j,k) - gzzx(i,j,k)) + gupyy_loc*(TWO*gyzz(i,j,k) - gzzy(i,j,k)) + gupyz_loc*gzzz(i,j,k))
Gamzzz(i,j,k)=HALF*( gupxz_loc*(TWO*gxzz(i,j,k) - gzzx(i,j,k)) + gupyz_loc*(TWO*gyzz(i,j,k) - gzzy(i,j,k)) + gupzz_loc*gzzz(i,j,k)) Gamxyz =HALF*( gupxx*( gxyz + gxzy - gyzx ) + gupxy*gyyz + gupxz*gzzy )
Gamyyz =HALF*( gupxy*( gxyz + gxzy - gyzx ) + gupyy*gyyz + gupyz*gzzy )
Gamxxy(i,j,k)=HALF*( gupxx_loc*gxxy(i,j,k) + gupxy_loc*gyyx(i,j,k) + gupxz_loc*(gxzy(i,j,k) + gyzx(i,j,k) - gxyz(i,j,k)) ) Gamzyz =HALF*( gupxz*( gxyz + gxzy - gyzx ) + gupyz*gyyz + gupzz*gzzy )
Gamyxy(i,j,k)=HALF*( gupxy_loc*gxxy(i,j,k) + gupyy_loc*gyyx(i,j,k) + gupyz_loc*(gxzy(i,j,k) + gyzx(i,j,k) - gxyz(i,j,k)) ) ! Raise indices of \tilde A_{ij} and store in R_ij
Gamzxy(i,j,k)=HALF*( gupxz_loc*gxxy(i,j,k) + gupyz_loc*gyyx(i,j,k) + gupzz_loc*(gxzy(i,j,k) + gyzx(i,j,k) - gxyz(i,j,k)) )
Rxx = gupxx * gupxx * Axx + gupxy * gupxy * Ayy + gupxz * gupxz * Azz + &
Gamxxz(i,j,k)=HALF*( gupxx_loc*gxxz(i,j,k) + gupxy_loc*(gxyz(i,j,k) + gyzx(i,j,k) - gxzy(i,j,k)) + gupxz_loc*gzzx(i,j,k) ) TWO*(gupxx * gupxy * Axy + gupxx * gupxz * Axz + gupxy * gupxz * Ayz)
Gamyxz(i,j,k)=HALF*( gupxy_loc*gxxz(i,j,k) + gupyy_loc*(gxyz(i,j,k) + gyzx(i,j,k) - gxzy(i,j,k)) + gupyz_loc*gzzx(i,j,k) )
Gamzxz(i,j,k)=HALF*( gupxz_loc*gxxz(i,j,k) + gupyz_loc*(gxyz(i,j,k) + gyzx(i,j,k) - gxzy(i,j,k)) + gupzz_loc*gzzx(i,j,k) ) Ryy = gupxy * gupxy * Axx + gupyy * gupyy * Ayy + gupyz * gupyz * Azz + &
TWO*(gupxy * gupyy * Axy + gupxy * gupyz * Axz + gupyy * gupyz * Ayz)
Gamxyz(i,j,k)=HALF*( gupxx_loc*(gxyz(i,j,k) + gxzy(i,j,k) - gyzx(i,j,k)) + gupxy_loc*gyyz(i,j,k) + gupxz_loc*gzzy(i,j,k) )
Gamyyz(i,j,k)=HALF*( gupxy_loc*(gxyz(i,j,k) + gxzy(i,j,k) - gyzx(i,j,k)) + gupyy_loc*gyyz(i,j,k) + gupyz_loc*gzzy(i,j,k) ) Rzz = gupxz * gupxz * Axx + gupyz * gupyz * Ayy + gupzz * gupzz * Azz + &
Gamzyz(i,j,k)=HALF*( gupxz_loc*(gxyz(i,j,k) + gxzy(i,j,k) - gyzx(i,j,k)) + gupyz_loc*gyyz(i,j,k) + gupzz_loc*gzzy(i,j,k) ) TWO*(gupxz * gupyz * Axy + gupxz * gupzz * Axz + gupyz * gupzz * Ayz)
enddo
enddo Rxy = gupxx * gupxy * Axx + gupxy * gupyy * Ayy + gupxz * gupyz * Azz + &
enddo (gupxx * gupyy + gupxy * gupxy)* Axy + &
! Raise indices of \tilde A_{ij} and store in R_ij (gupxx * gupyz + gupxz * gupxy)* Axz + &
(gupxy * gupyz + gupxz * gupyy)* Ayz
! Right hand side for Gam^i without shift terms...
call fderivs(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev) Rxz = gupxx * gupxz * Axx + gupxy * gupyz * Ayy + gupxz * gupzz * Azz + &
call fderivs(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev) (gupxx * gupyz + gupxy * gupxz)* Axy + &
do k=1,ex(3) (gupxx * gupzz + gupxz * gupxz)* Axz + &
do j=1,ex(2) (gupxy * gupzz + gupxz * gupyz)* Ayz
do i=1,ex(1)
gupxx_loc = gupxx(i,j,k) Ryz = gupxy * gupxz * Axx + gupyy * gupyz * Ayy + gupyz * gupzz * Azz + &
gupxy_loc = gupxy(i,j,k) (gupxy * gupyz + gupyy * gupxz)* Axy + &
gupxz_loc = gupxz(i,j,k) (gupxy * gupzz + gupyz * gupxz)* Axz + &
gupyy_loc = gupyy(i,j,k) (gupyy * gupzz + gupyz * gupyz)* Ayz
gupyz_loc = gupyz(i,j,k)
gupzz_loc = gupzz(i,j,k) ! Right hand side for Gam^i without shift terms...
call fderivs(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
Rxx_loc = gupxx_loc * gupxx_loc * Axx(i,j,k) + gupxy_loc * gupxy_loc * Ayy(i,j,k) + gupxz_loc * gupxz_loc * Azz(i,j,k) + & call fderivs(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev)
TWO * (gupxx_loc * gupxy_loc * Axy(i,j,k) + gupxx_loc * gupxz_loc * Axz(i,j,k) + gupxy_loc * gupxz_loc * Ayz(i,j,k))
Ryy_loc = gupxy_loc * gupxy_loc * Axx(i,j,k) + gupyy_loc * gupyy_loc * Ayy(i,j,k) + gupyz_loc * gupyz_loc * Azz(i,j,k) + & Gamx_rhs = - TWO * ( Lapx * Rxx + Lapy * Rxy + Lapz * Rxz ) + &
TWO * (gupxy_loc * gupyy_loc * Axy(i,j,k) + gupxy_loc * gupyz_loc * Axz(i,j,k) + gupyy_loc * gupyz_loc * Ayz(i,j,k)) TWO * alpn1 * ( &
Rzz_loc = gupxz_loc * gupxz_loc * Axx(i,j,k) + gupyz_loc * gupyz_loc * Ayy(i,j,k) + gupzz_loc * gupzz_loc * Azz(i,j,k) + & -F3o2/chin1 * ( chix * Rxx + chiy * Rxy + chiz * Rxz ) - &
TWO * (gupxz_loc * gupyz_loc * Axy(i,j,k) + gupxz_loc * gupzz_loc * Axz(i,j,k) + gupyz_loc * gupzz_loc * Ayz(i,j,k)) gupxx * ( F2o3 * Kx + EIGHT * PI * Sx ) - &
Rxy_loc = gupxx_loc * gupxy_loc * Axx(i,j,k) + gupxy_loc * gupyy_loc * Ayy(i,j,k) + gupxz_loc * gupyz_loc * Azz(i,j,k) + & gupxy * ( F2o3 * Ky + EIGHT * PI * Sy ) - &
(gupxx_loc * gupyy_loc + gupxy_loc * gupxy_loc) * Axy(i,j,k) + & gupxz * ( F2o3 * Kz + EIGHT * PI * Sz ) + &
(gupxx_loc * gupyz_loc + gupxz_loc * gupxy_loc) * Axz(i,j,k) + & Gamxxx * Rxx + Gamxyy * Ryy + Gamxzz * Rzz + &
(gupxy_loc * gupyz_loc + gupxz_loc * gupyy_loc) * Ayz(i,j,k) TWO * ( Gamxxy * Rxy + Gamxxz * Rxz + Gamxyz * Ryz ) )
Rxz_loc = gupxx_loc * gupxz_loc * Axx(i,j,k) + gupxy_loc * gupyz_loc * Ayy(i,j,k) + gupxz_loc * gupzz_loc * Azz(i,j,k) + &
(gupxx_loc * gupyz_loc + gupxy_loc * gupxz_loc) * Axy(i,j,k) + & Gamy_rhs = - TWO * ( Lapx * Rxy + Lapy * Ryy + Lapz * Ryz ) + &
(gupxx_loc * gupzz_loc + gupxz_loc * gupxz_loc) * Axz(i,j,k) + & TWO * alpn1 * ( &
(gupxy_loc * gupzz_loc + gupxz_loc * gupyz_loc) * Ayz(i,j,k) -F3o2/chin1 * ( chix * Rxy + chiy * Ryy + chiz * Ryz ) - &
Ryz_loc = gupxy_loc * gupxz_loc * Axx(i,j,k) + gupyy_loc * gupyz_loc * Ayy(i,j,k) + gupyz_loc * gupzz_loc * Azz(i,j,k) + & gupxy * ( F2o3 * Kx + EIGHT * PI * Sx ) - &
(gupxy_loc * gupyz_loc + gupyy_loc * gupxz_loc) * Axy(i,j,k) + & gupyy * ( F2o3 * Ky + EIGHT * PI * Sy ) - &
(gupxy_loc * gupzz_loc + gupyz_loc * gupxz_loc) * Axz(i,j,k) + & gupyz * ( F2o3 * Kz + EIGHT * PI * Sz ) + &
(gupyy_loc * gupzz_loc + gupyz_loc * gupyz_loc) * Ayz(i,j,k) Gamyxx * Rxx + Gamyyy * Ryy + Gamyzz * Rzz + &
Rxx(i,j,k) = Rxx_loc TWO * ( Gamyxy * Rxy + Gamyxz * Rxz + Gamyyz * Ryz ) )
Ryy(i,j,k) = Ryy_loc
Rzz(i,j,k) = Rzz_loc Gamz_rhs = - TWO * ( Lapx * Rxz + Lapy * Ryz + Lapz * Rzz ) + &
Rxy(i,j,k) = Rxy_loc TWO * alpn1 * ( &
Rxz(i,j,k) = Rxz_loc -F3o2/chin1 * ( chix * Rxz + chiy * Ryz + chiz * Rzz ) - &
Ryz(i,j,k) = Ryz_loc gupxz * ( F2o3 * Kx + EIGHT * PI * Sx ) - &
gupyz * ( F2o3 * Ky + EIGHT * PI * Sy ) - &
Gamx_rhs(i,j,k) = - TWO * (Lapx(i,j,k) * Rxx_loc + Lapy(i,j,k) * Rxy_loc + Lapz(i,j,k) * Rxz_loc) + & gupzz * ( F2o3 * Kz + EIGHT * PI * Sz ) + &
TWO * alpn1(i,j,k) * ( & Gamzxx * Rxx + Gamzyy * Ryy + Gamzzz * Rzz + &
-F3o2/chin1(i,j,k) * (chix(i,j,k) * Rxx_loc + chiy(i,j,k) * Rxy_loc + chiz(i,j,k) * Rxz_loc) - & TWO * ( Gamzxy * Rxy + Gamzxz * Rxz + Gamzyz * Ryz ) )
gupxx_loc * (F2o3 * Kx(i,j,k) + EIGHT * PI * Sx(i,j,k)) - &
gupxy_loc * (F2o3 * Ky(i,j,k) + EIGHT * PI * Sy(i,j,k)) - &
gupxz_loc * (F2o3 * Kz(i,j,k) + EIGHT * PI * Sz(i,j,k)) + &
Gamxxx(i,j,k) * Rxx_loc + Gamxyy(i,j,k) * Ryy_loc + Gamxzz(i,j,k) * Rzz_loc + &
TWO * (Gamxxy(i,j,k) * Rxy_loc + Gamxxz(i,j,k) * Rxz_loc + Gamxyz(i,j,k) * Ryz_loc))
Gamy_rhs(i,j,k) = - TWO * (Lapx(i,j,k) * Rxy_loc + Lapy(i,j,k) * Ryy_loc + Lapz(i,j,k) * Ryz_loc) + &
TWO * alpn1(i,j,k) * ( &
-F3o2/chin1(i,j,k) * (chix(i,j,k) * Rxy_loc + chiy(i,j,k) * Ryy_loc + chiz(i,j,k) * Ryz_loc) - &
gupxy_loc * (F2o3 * Kx(i,j,k) + EIGHT * PI * Sx(i,j,k)) - &
gupyy_loc * (F2o3 * Ky(i,j,k) + EIGHT * PI * Sy(i,j,k)) - &
gupyz_loc * (F2o3 * Kz(i,j,k) + EIGHT * PI * Sz(i,j,k)) + &
Gamyxx(i,j,k) * Rxx_loc + Gamyyy(i,j,k) * Ryy_loc + Gamyzz(i,j,k) * Rzz_loc + &
TWO * (Gamyxy(i,j,k) * Rxy_loc + Gamyxz(i,j,k) * Rxz_loc + Gamyyz(i,j,k) * Ryz_loc))
Gamz_rhs(i,j,k) = - TWO * (Lapx(i,j,k) * Rxz_loc + Lapy(i,j,k) * Ryz_loc + Lapz(i,j,k) * Rzz_loc) + &
TWO * alpn1(i,j,k) * ( &
-F3o2/chin1(i,j,k) * (chix(i,j,k) * Rxz_loc + chiy(i,j,k) * Ryz_loc + chiz(i,j,k) * Rzz_loc) - &
gupxz_loc * (F2o3 * Kx(i,j,k) + EIGHT * PI * Sx(i,j,k)) - &
gupyz_loc * (F2o3 * Ky(i,j,k) + EIGHT * PI * Sy(i,j,k)) - &
gupzz_loc * (F2o3 * Kz(i,j,k) + EIGHT * PI * Sz(i,j,k)) + &
Gamzxx(i,j,k) * Rxx_loc + Gamzyy(i,j,k) * Ryy_loc + Gamzzz(i,j,k) * Rzz_loc + &
TWO * (Gamzxy(i,j,k) * Rxy_loc + Gamzxz(i,j,k) * Rxz_loc + Gamzyz(i,j,k) * Ryz_loc))
enddo
enddo
enddo
call fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,& call fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,&
X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev) X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev)
@@ -359,54 +321,38 @@
call fdderivs(ex,betaz,gxxz,gxyz,gxzz,gyyz,gyzz,gzzz,& call fdderivs(ex,betaz,gxxz,gxyz,gxzz,gyyz,gyzz,gzzz,&
X,Y,Z,SYM ,SYM, ANTI,Symmetry,Lev) X,Y,Z,SYM ,SYM, ANTI,Symmetry,Lev)
call fderivs(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev) fxx = gxxx + gxyy + gxzz
call fderivs(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev) fxy = gxyx + gyyy + gyzz
call fderivs(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev) fxz = gxzx + gyzy + gzzz
do k=1,ex(3)
do j=1,ex(2) Gamxa = gupxx * Gamxxx + gupyy * Gamxyy + gupzz * Gamxzz + &
do i=1,ex(1) TWO*( gupxy * Gamxxy + gupxz * Gamxxz + gupyz * Gamxyz )
divb_loc = div_beta(i,j,k) Gamya = gupxx * Gamyxx + gupyy * Gamyyy + gupzz * Gamyzz + &
fxx_loc = gxxx(i,j,k) + gxyy(i,j,k) + gxzz(i,j,k) TWO*( gupxy * Gamyxy + gupxz * Gamyxz + gupyz * Gamyyz )
fxy_loc = gxyx(i,j,k) + gyyy(i,j,k) + gyzz(i,j,k) Gamza = gupxx * Gamzxx + gupyy * Gamzyy + gupzz * Gamzzz + &
fxz_loc = gxzx(i,j,k) + gyzy(i,j,k) + gzzz(i,j,k) TWO*( gupxy * Gamzxy + gupxz * Gamzxz + gupyz * Gamzyz )
gupxx_loc = gupxx(i,j,k) call fderivs(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev)
gupxy_loc = gupxy(i,j,k) call fderivs(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev)
gupxz_loc = gupxz(i,j,k) call fderivs(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev)
gupyy_loc = gupyy(i,j,k)
gupyz_loc = gupyz(i,j,k) Gamx_rhs = Gamx_rhs + F2o3 * Gamxa * div_beta - &
gupzz_loc = gupzz(i,j,k) Gamxa * betaxx - Gamya * betaxy - Gamza * betaxz + &
F1o3 * (gupxx * fxx + gupxy * fxy + gupxz * fxz ) + &
Gamxa_loc = gupxx_loc * Gamxxx(i,j,k) + gupyy_loc * Gamxyy(i,j,k) + gupzz_loc * Gamxzz(i,j,k) + & gupxx * gxxx + gupyy * gyyx + gupzz * gzzx + &
TWO * (gupxy_loc * Gamxxy(i,j,k) + gupxz_loc * Gamxxz(i,j,k) + gupyz_loc * Gamxyz(i,j,k)) TWO * (gupxy * gxyx + gupxz * gxzx + gupyz * gyzx )
Gamya_loc = gupxx_loc * Gamyxx(i,j,k) + gupyy_loc * Gamyyy(i,j,k) + gupzz_loc * Gamyzz(i,j,k) + &
TWO * (gupxy_loc * Gamyxy(i,j,k) + gupxz_loc * Gamyxz(i,j,k) + gupyz_loc * Gamyyz(i,j,k)) Gamy_rhs = Gamy_rhs + F2o3 * Gamya * div_beta - &
Gamza_loc = gupxx_loc * Gamzxx(i,j,k) + gupyy_loc * Gamzyy(i,j,k) + gupzz_loc * Gamzzz(i,j,k) + & Gamxa * betayx - Gamya * betayy - Gamza * betayz + &
TWO * (gupxy_loc * Gamzxy(i,j,k) + gupxz_loc * Gamzxz(i,j,k) + gupyz_loc * Gamzyz(i,j,k)) F1o3 * (gupxy * fxx + gupyy * fxy + gupyz * fxz ) + &
Gamxa(i,j,k) = Gamxa_loc gupxx * gxxy + gupyy * gyyy + gupzz * gzzy + &
Gamya(i,j,k) = Gamya_loc TWO * (gupxy * gxyy + gupxz * gxzy + gupyz * gyzy )
Gamza(i,j,k) = Gamza_loc
Gamz_rhs = Gamz_rhs + F2o3 * Gamza * div_beta - &
Gamx_rhs(i,j,k) = Gamx_rhs(i,j,k) + F2o3 * Gamxa_loc * divb_loc - & Gamxa * betazx - Gamya * betazy - Gamza * betazz + &
Gamxa_loc * betaxx(i,j,k) - Gamya_loc * betaxy(i,j,k) - Gamza_loc * betaxz(i,j,k) + & F1o3 * (gupxz * fxx + gupyz * fxy + gupzz * fxz ) + &
F1o3 * (gupxx_loc * fxx_loc + gupxy_loc * fxy_loc + gupxz_loc * fxz_loc) + & gupxx * gxxz + gupyy * gyyz + gupzz * gzzz + &
gupxx_loc * gxxx(i,j,k) + gupyy_loc * gyyx(i,j,k) + gupzz_loc * gzzx(i,j,k) + & TWO * (gupxy * gxyz + gupxz * gxzz + gupyz * gyzz ) !rhs for Gam^i
TWO * (gupxy_loc * gxyx(i,j,k) + gupxz_loc * gxzx(i,j,k) + gupyz_loc * gyzx(i,j,k))
Gamy_rhs(i,j,k) = Gamy_rhs(i,j,k) + F2o3 * Gamya_loc * divb_loc - &
Gamxa_loc * betayx(i,j,k) - Gamya_loc * betayy(i,j,k) - Gamza_loc * betayz(i,j,k) + &
F1o3 * (gupxy_loc * fxx_loc + gupyy_loc * fxy_loc + gupyz_loc * fxz_loc) + &
gupxx_loc * gxxy(i,j,k) + gupyy_loc * gyyy(i,j,k) + gupzz_loc * gzzy(i,j,k) + &
TWO * (gupxy_loc * gxyy(i,j,k) + gupxz_loc * gxzy(i,j,k) + gupyz_loc * gyzy(i,j,k))
Gamz_rhs(i,j,k) = Gamz_rhs(i,j,k) + F2o3 * Gamza_loc * divb_loc - &
Gamxa_loc * betazx(i,j,k) - Gamya_loc * betazy(i,j,k) - Gamza_loc * betazz(i,j,k) + &
F1o3 * (gupxz_loc * fxx_loc + gupyz_loc * fxy_loc + gupzz_loc * fxz_loc) + &
gupxx_loc * gxxz(i,j,k) + gupyy_loc * gyyz(i,j,k) + gupzz_loc * gzzz(i,j,k) + &
TWO * (gupxy_loc * gxyz(i,j,k) + gupxz_loc * gxzz(i,j,k) + gupyz_loc * gyzz(i,j,k))
enddo
enddo
enddo
!first kind of connection stored in gij,k !first kind of connection stored in gij,k
gxxx = gxx * Gamxxx + gxy * Gamyxx + gxz * Gamzxx gxxx = gxx * Gamxxx + gxy * Gamyxx + gxz * Gamzxx
@@ -655,190 +601,192 @@
Gamxyz * gxzz + Gamyyz * gyzz + Gamzyz * gzzz + & Gamxyz * gxzz + Gamyyz * gyzz + Gamzyz * gzzz + &
Gamxzz * gxzy + Gamyzz * gyzy + Gamzzz * gzzy + & Gamxzz * gxzy + Gamyzz * gyzy + Gamzzz * gzzy + &
Gamxyz * gzzx + Gamyyz * gzzy + Gamzyz * gzzz ) Gamxyz * gzzx + Gamyyz * gzzy + Gamzyz * gzzz )
!covariant second derivative of chi respect to tilted metric !covariant second derivative of chi respect to tilted metric
call fdderivs(ex,chi,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev) call fdderivs(ex,chi,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
do k=1,ex(3) fxx = fxx - Gamxxx * chix - Gamyxx * chiy - Gamzxx * chiz
do j=1,ex(2) fxy = fxy - Gamxxy * chix - Gamyxy * chiy - Gamzxy * chiz
do i=1,ex(1) fxz = fxz - Gamxxz * chix - Gamyxz * chiy - Gamzxz * chiz
fxx(i,j,k) = fxx(i,j,k) - Gamxxx(i,j,k) * chix(i,j,k) - Gamyxx(i,j,k) * chiy(i,j,k) - Gamzxx(i,j,k) * chiz(i,j,k) fyy = fyy - Gamxyy * chix - Gamyyy * chiy - Gamzyy * chiz
fxy(i,j,k) = fxy(i,j,k) - Gamxxy(i,j,k) * chix(i,j,k) - Gamyxy(i,j,k) * chiy(i,j,k) - Gamzxy(i,j,k) * chiz(i,j,k) fyz = fyz - Gamxyz * chix - Gamyyz * chiy - Gamzyz * chiz
fxz(i,j,k) = fxz(i,j,k) - Gamxxz(i,j,k) * chix(i,j,k) - Gamyxz(i,j,k) * chiy(i,j,k) - Gamzxz(i,j,k) * chiz(i,j,k) fzz = fzz - Gamxzz * chix - Gamyzz * chiy - Gamzzz * chiz
fyy(i,j,k) = fyy(i,j,k) - Gamxyy(i,j,k) * chix(i,j,k) - Gamyyy(i,j,k) * chiy(i,j,k) - Gamzyy(i,j,k) * chiz(i,j,k) ! Store D^l D_l chi - 3/(2*chi) D^l chi D_l chi in f
fyz(i,j,k) = fyz(i,j,k) - Gamxyz(i,j,k) * chix(i,j,k) - Gamyyz(i,j,k) * chiy(i,j,k) - Gamzyz(i,j,k) * chiz(i,j,k)
fzz(i,j,k) = fzz(i,j,k) - Gamxzz(i,j,k) * chix(i,j,k) - Gamyzz(i,j,k) * chiy(i,j,k) - Gamzzz(i,j,k) * chiz(i,j,k) f = gupxx * ( fxx - F3o2/chin1 * chix * chix ) + &
gupyy * ( fyy - F3o2/chin1 * chiy * chiy ) + &
chin_loc = chin1(i,j,k) gupzz * ( fzz - F3o2/chin1 * chiz * chiz ) + &
f_loc = gupxx(i,j,k) * (fxx(i,j,k) - F3o2/chin_loc * chix(i,j,k) * chix(i,j,k)) + & TWO * gupxy * ( fxy - F3o2/chin1 * chix * chiy ) + &
gupyy(i,j,k) * (fyy(i,j,k) - F3o2/chin_loc * chiy(i,j,k) * chiy(i,j,k)) + & TWO * gupxz * ( fxz - F3o2/chin1 * chix * chiz ) + &
gupzz(i,j,k) * (fzz(i,j,k) - F3o2/chin_loc * chiz(i,j,k) * chiz(i,j,k)) + & TWO * gupyz * ( fyz - F3o2/chin1 * chiy * chiz )
TWO * gupxy(i,j,k) * (fxy(i,j,k) - F3o2/chin_loc * chix(i,j,k) * chiy(i,j,k)) + & ! Add chi part to Ricci tensor:
TWO * gupxz(i,j,k) * (fxz(i,j,k) - F3o2/chin_loc * chix(i,j,k) * chiz(i,j,k)) + &
TWO * gupyz(i,j,k) * (fyz(i,j,k) - F3o2/chin_loc * chiy(i,j,k) * chiz(i,j,k)) Rxx = Rxx + (fxx - chix*chix/chin1/TWO + gxx * f)/chin1/TWO
f(i,j,k) = f_loc Ryy = Ryy + (fyy - chiy*chiy/chin1/TWO + gyy * f)/chin1/TWO
Rzz = Rzz + (fzz - chiz*chiz/chin1/TWO + gzz * f)/chin1/TWO
Rxx(i,j,k) = Rxx(i,j,k) + (fxx(i,j,k) - chix(i,j,k)*chix(i,j,k)/chin_loc/TWO + gxx(i,j,k) * f_loc)/chin_loc/TWO Rxy = Rxy + (fxy - chix*chiy/chin1/TWO + gxy * f)/chin1/TWO
Ryy(i,j,k) = Ryy(i,j,k) + (fyy(i,j,k) - chiy(i,j,k)*chiy(i,j,k)/chin_loc/TWO + gyy(i,j,k) * f_loc)/chin_loc/TWO Rxz = Rxz + (fxz - chix*chiz/chin1/TWO + gxz * f)/chin1/TWO
Rzz(i,j,k) = Rzz(i,j,k) + (fzz(i,j,k) - chiz(i,j,k)*chiz(i,j,k)/chin_loc/TWO + gzz(i,j,k) * f_loc)/chin_loc/TWO Ryz = Ryz + (fyz - chiy*chiz/chin1/TWO + gyz * f)/chin1/TWO
Rxy(i,j,k) = Rxy(i,j,k) + (fxy(i,j,k) - chix(i,j,k)*chiy(i,j,k)/chin_loc/TWO + gxy(i,j,k) * f_loc)/chin_loc/TWO
Rxz(i,j,k) = Rxz(i,j,k) + (fxz(i,j,k) - chix(i,j,k)*chiz(i,j,k)/chin_loc/TWO + gxz(i,j,k) * f_loc)/chin_loc/TWO ! covariant second derivatives of the lapse respect to physical metric
Ryz(i,j,k) = Ryz(i,j,k) + (fyz(i,j,k) - chiy(i,j,k)*chiz(i,j,k)/chin_loc/TWO + gyz(i,j,k) * f_loc)/chin_loc/TWO call fdderivs(ex,Lap,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z, &
enddo SYM,SYM,SYM,symmetry,Lev)
enddo
enddo gxxx = (gupxx * chix + gupxy * chiy + gupxz * chiz)/chin1
gxxy = (gupxy * chix + gupyy * chiy + gupyz * chiz)/chin1
! covariant second derivatives of the lapse respect to physical metric gxxz = (gupxz * chix + gupyz * chiy + gupzz * chiz)/chin1
call fdderivs(ex,Lap,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z, & ! now get physical second kind of connection
SYM,SYM,SYM,symmetry,Lev) Gamxxx = Gamxxx - ( (chix + chix)/chin1 - gxx * gxxx )*HALF
Gamyxx = Gamyxx - ( - gxx * gxxy )*HALF
do k=1,ex(3) Gamzxx = Gamzxx - ( - gxx * gxxz )*HALF
do j=1,ex(2) Gamxyy = Gamxyy - ( - gyy * gxxx )*HALF
do i=1,ex(1) Gamyyy = Gamyyy - ( (chiy + chiy)/chin1 - gyy * gxxy )*HALF
chin_loc = chin1(i,j,k) Gamzyy = Gamzyy - ( - gyy * gxxz )*HALF
gxxx(i,j,k) = (gupxx(i,j,k) * chix(i,j,k) + gupxy(i,j,k) * chiy(i,j,k) + gupxz(i,j,k) * chiz(i,j,k)) / chin_loc Gamxzz = Gamxzz - ( - gzz * gxxx )*HALF
gxxy(i,j,k) = (gupxy(i,j,k) * chix(i,j,k) + gupyy(i,j,k) * chiy(i,j,k) + gupyz(i,j,k) * chiz(i,j,k)) / chin_loc Gamyzz = Gamyzz - ( - gzz * gxxy )*HALF
gxxz(i,j,k) = (gupxz(i,j,k) * chix(i,j,k) + gupyz(i,j,k) * chiy(i,j,k) + gupzz(i,j,k) * chiz(i,j,k)) / chin_loc Gamzzz = Gamzzz - ( (chiz + chiz)/chin1 - gzz * gxxz )*HALF
Gamxxy = Gamxxy - ( chiy /chin1 - gxy * gxxx )*HALF
Gamxxx(i,j,k) = Gamxxx(i,j,k) - ( (chix(i,j,k) + chix(i,j,k))/chin_loc - gxx(i,j,k) * gxxx(i,j,k) )*HALF Gamyxy = Gamyxy - ( chix /chin1 - gxy * gxxy )*HALF
Gamyxx(i,j,k) = Gamyxx(i,j,k) - ( - gxx(i,j,k) * gxxy(i,j,k) )*HALF Gamzxy = Gamzxy - ( - gxy * gxxz )*HALF
Gamzxx(i,j,k) = Gamzxx(i,j,k) - ( - gxx(i,j,k) * gxxz(i,j,k) )*HALF Gamxxz = Gamxxz - ( chiz /chin1 - gxz * gxxx )*HALF
Gamxyy(i,j,k) = Gamxyy(i,j,k) - ( - gyy(i,j,k) * gxxx(i,j,k) )*HALF Gamyxz = Gamyxz - ( - gxz * gxxy )*HALF
Gamyyy(i,j,k) = Gamyyy(i,j,k) - ( (chiy(i,j,k) + chiy(i,j,k))/chin_loc - gyy(i,j,k) * gxxy(i,j,k) )*HALF Gamzxz = Gamzxz - ( chix /chin1 - gxz * gxxz )*HALF
Gamzyy(i,j,k) = Gamzyy(i,j,k) - ( - gyy(i,j,k) * gxxz(i,j,k) )*HALF Gamxyz = Gamxyz - ( - gyz * gxxx )*HALF
Gamxzz(i,j,k) = Gamxzz(i,j,k) - ( - gzz(i,j,k) * gxxx(i,j,k) )*HALF Gamyyz = Gamyyz - ( chiz /chin1 - gyz * gxxy )*HALF
Gamyzz(i,j,k) = Gamyzz(i,j,k) - ( - gzz(i,j,k) * gxxy(i,j,k) )*HALF Gamzyz = Gamzyz - ( chiy /chin1 - gyz * gxxz )*HALF
Gamzzz(i,j,k) = Gamzzz(i,j,k) - ( (chiz(i,j,k) + chiz(i,j,k))/chin_loc - gzz(i,j,k) * gxxz(i,j,k) )*HALF
Gamxxy(i,j,k) = Gamxxy(i,j,k) - ( chiy(i,j,k) /chin_loc - gxy(i,j,k) * gxxx(i,j,k) )*HALF fxx = fxx - Gamxxx*Lapx - Gamyxx*Lapy - Gamzxx*Lapz
Gamyxy(i,j,k) = Gamyxy(i,j,k) - ( chix(i,j,k) /chin_loc - gxy(i,j,k) * gxxy(i,j,k) )*HALF fyy = fyy - Gamxyy*Lapx - Gamyyy*Lapy - Gamzyy*Lapz
Gamzxy(i,j,k) = Gamzxy(i,j,k) - ( - gxy(i,j,k) * gxxz(i,j,k) )*HALF fzz = fzz - Gamxzz*Lapx - Gamyzz*Lapy - Gamzzz*Lapz
Gamxxz(i,j,k) = Gamxxz(i,j,k) - ( chiz(i,j,k) /chin_loc - gxz(i,j,k) * gxxx(i,j,k) )*HALF fxy = fxy - Gamxxy*Lapx - Gamyxy*Lapy - Gamzxy*Lapz
Gamyxz(i,j,k) = Gamyxz(i,j,k) - ( - gxz(i,j,k) * gxxy(i,j,k) )*HALF fxz = fxz - Gamxxz*Lapx - Gamyxz*Lapy - Gamzxz*Lapz
Gamzxz(i,j,k) = Gamzxz(i,j,k) - ( chix(i,j,k) /chin_loc - gxz(i,j,k) * gxxz(i,j,k) )*HALF fyz = fyz - Gamxyz*Lapx - Gamyyz*Lapy - Gamzyz*Lapz
Gamxyz(i,j,k) = Gamxyz(i,j,k) - ( - gyz(i,j,k) * gxxx(i,j,k) )*HALF
Gamyyz(i,j,k) = Gamyyz(i,j,k) - ( chiz(i,j,k) /chin_loc - gyz(i,j,k) * gxxy(i,j,k) )*HALF ! store D^i D_i Lap in trK_rhs upto chi
Gamzyz(i,j,k) = Gamzyz(i,j,k) - ( chiy(i,j,k) /chin_loc - gyz(i,j,k) * gxxz(i,j,k) )*HALF trK_rhs = gupxx * fxx + gupyy * fyy + gupzz * fzz + &
TWO* ( gupxy * fxy + gupxz * fxz + gupyz * fyz )
fxx(i,j,k) = fxx(i,j,k) - Gamxxx(i,j,k)*Lapx(i,j,k) - Gamyxx(i,j,k)*Lapy(i,j,k) - Gamzxx(i,j,k)*Lapz(i,j,k) #if 1
fyy(i,j,k) = fyy(i,j,k) - Gamxyy(i,j,k)*Lapx(i,j,k) - Gamyyy(i,j,k)*Lapy(i,j,k) - Gamzyy(i,j,k)*Lapz(i,j,k) !! follow bam code
fzz(i,j,k) = fzz(i,j,k) - Gamxzz(i,j,k)*Lapx(i,j,k) - Gamyzz(i,j,k)*Lapy(i,j,k) - Gamzzz(i,j,k)*Lapz(i,j,k) S = chin1 * ( gupxx * Sxx + gupyy * Syy + gupzz * Szz + &
fxy(i,j,k) = fxy(i,j,k) - Gamxxy(i,j,k)*Lapx(i,j,k) - Gamyxy(i,j,k)*Lapy(i,j,k) - Gamzxy(i,j,k)*Lapz(i,j,k) TWO * ( gupxy * Sxy + gupxz * Sxz + gupyz * Syz ) )
fxz(i,j,k) = fxz(i,j,k) - Gamxxz(i,j,k)*Lapx(i,j,k) - Gamyxz(i,j,k)*Lapy(i,j,k) - Gamzxz(i,j,k)*Lapz(i,j,k) f = F2o3 * trK * trK -(&
fyz(i,j,k) = fyz(i,j,k) - Gamxyz(i,j,k)*Lapx(i,j,k) - Gamyyz(i,j,k)*Lapy(i,j,k) - Gamzyz(i,j,k)*Lapz(i,j,k) gupxx * ( &
gupxx * Axx * Axx + gupyy * Axy * Axy + gupzz * Axz * Axz + &
trK_rhs(i,j,k) = gupxx(i,j,k) * fxx(i,j,k) + gupyy(i,j,k) * fyy(i,j,k) + gupzz(i,j,k) * fzz(i,j,k) + & TWO * (gupxy * Axx * Axy + gupxz * Axx * Axz + gupyz * Axy * Axz) ) + &
TWO * (gupxy(i,j,k) * fxy(i,j,k) + gupxz(i,j,k) * fxz(i,j,k) + gupyz(i,j,k) * fyz(i,j,k)) gupyy * ( &
enddo gupxx * Axy * Axy + gupyy * Ayy * Ayy + gupzz * Ayz * Ayz + &
enddo TWO * (gupxy * Axy * Ayy + gupxz * Axy * Ayz + gupyz * Ayy * Ayz) ) + &
enddo gupzz * ( &
do k=1,ex(3) gupxx * Axz * Axz + gupyy * Ayz * Ayz + gupzz * Azz * Azz + &
do j=1,ex(2) TWO * (gupxy * Axz * Ayz + gupxz * Axz * Azz + gupyz * Ayz * Azz) ) + &
do i=1,ex(1) TWO * ( &
divb_loc = div_beta(i,j,k) gupxy * ( &
chin_loc = chin1(i,j,k) gupxx * Axx * Axy + gupyy * Axy * Ayy + gupzz * Axz * Ayz + &
gupxy * (Axx * Ayy + Axy * Axy) + &
S_loc = chin_loc * ( gupxx(i,j,k) * Sxx(i,j,k) + gupyy(i,j,k) * Syy(i,j,k) + gupzz(i,j,k) * Szz(i,j,k) + & gupxz * (Axx * Ayz + Axz * Axy) + &
TWO * (gupxy(i,j,k) * Sxy(i,j,k) + gupxz(i,j,k) * Sxz(i,j,k) + gupyz(i,j,k) * Syz(i,j,k)) ) gupyz * (Axy * Ayz + Axz * Ayy) ) + &
S(i,j,k) = S_loc gupxz * ( &
gupxx * Axx * Axz + gupyy * Axy * Ayz + gupzz * Axz * Azz + &
f_loc = F2o3 * trK(i,j,k) * trK(i,j,k) - ( & gupxy * (Axx * Ayz + Axy * Axz) + &
gupxx(i,j,k) * ( gupxx(i,j,k) * Axx(i,j,k) * Axx(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Axy(i,j,k) + & gupxz * (Axx * Azz + Axz * Axz) + &
gupzz(i,j,k) * Axz(i,j,k) * Axz(i,j,k) + & gupyz * (Axy * Azz + Axz * Ayz) ) + &
TWO * (gupxy(i,j,k) * Axx(i,j,k) * Axy(i,j,k) + gupxz(i,j,k) * Axx(i,j,k) * Axz(i,j,k) + & gupyz * ( &
gupyz(i,j,k) * Axy(i,j,k) * Axz(i,j,k)) ) + & gupxx * Axy * Axz + gupyy * Ayy * Ayz + gupzz * Ayz * Azz + &
gupyy(i,j,k) * ( gupxx(i,j,k) * Axy(i,j,k) * Axy(i,j,k) + gupyy(i,j,k) * Ayy(i,j,k) * Ayy(i,j,k) + & gupxy * (Axy * Ayz + Ayy * Axz) + &
gupzz(i,j,k) * Ayz(i,j,k) * Ayz(i,j,k) + & gupxz * (Axy * Azz + Ayz * Axz) + &
TWO * (gupxy(i,j,k) * Axy(i,j,k) * Ayy(i,j,k) + gupxz(i,j,k) * Axy(i,j,k) * Ayz(i,j,k) + & gupyz * (Ayy * Azz + Ayz * Ayz) ) )) -1.6d1*PI*rho + EIGHT * PI * S
gupyz(i,j,k) * Ayy(i,j,k) * Ayz(i,j,k)) ) + & f = - F1o3 *( gupxx * fxx + gupyy * fyy + gupzz * fzz + &
gupzz(i,j,k) * ( gupxx(i,j,k) * Axz(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Ayz(i,j,k) * Ayz(i,j,k) + & TWO* ( gupxy * fxy + gupxz * fxz + gupyz * fyz ) + alpn1/chin1*f)
gupzz(i,j,k) * Azz(i,j,k) * Azz(i,j,k) + &
TWO * (gupxy(i,j,k) * Axz(i,j,k) * Ayz(i,j,k) + gupxz(i,j,k) * Axz(i,j,k) * Azz(i,j,k) + & fxx = alpn1 * (Rxx - EIGHT * PI * Sxx) - fxx
gupyz(i,j,k) * Ayz(i,j,k) * Azz(i,j,k)) ) + & fxy = alpn1 * (Rxy - EIGHT * PI * Sxy) - fxy
TWO * ( gupxy(i,j,k) * ( gupxx(i,j,k) * Axx(i,j,k) * Axy(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Ayy(i,j,k) + & fxz = alpn1 * (Rxz - EIGHT * PI * Sxz) - fxz
gupzz(i,j,k) * Axz(i,j,k) * Ayz(i,j,k) + & fyy = alpn1 * (Ryy - EIGHT * PI * Syy) - fyy
gupxy(i,j,k) * (Axx(i,j,k) * Ayy(i,j,k) + Axy(i,j,k) * Axy(i,j,k)) + & fyz = alpn1 * (Ryz - EIGHT * PI * Syz) - fyz
gupxz(i,j,k) * (Axx(i,j,k) * Ayz(i,j,k) + Axz(i,j,k) * Axy(i,j,k)) + & fzz = alpn1 * (Rzz - EIGHT * PI * Szz) - fzz
gupyz(i,j,k) * (Axy(i,j,k) * Ayz(i,j,k) + Axz(i,j,k) * Ayy(i,j,k)) ) + & #else
gupxz(i,j,k) * ( gupxx(i,j,k) * Axx(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Ayz(i,j,k) + & ! Add lapse and S_ij parts to Ricci tensor:
gupzz(i,j,k) * Axz(i,j,k) * Azz(i,j,k) + &
gupxy(i,j,k) * (Axx(i,j,k) * Ayz(i,j,k) + Axy(i,j,k) * Axz(i,j,k)) + & fxx = alpn1 * (Rxx - EIGHT * PI * Sxx) - fxx
gupxz(i,j,k) * (Axx(i,j,k) * Azz(i,j,k) + Axz(i,j,k) * Axz(i,j,k)) + & fxy = alpn1 * (Rxy - EIGHT * PI * Sxy) - fxy
gupyz(i,j,k) * (Axy(i,j,k) * Azz(i,j,k) + Axz(i,j,k) * Ayz(i,j,k)) ) + & fxz = alpn1 * (Rxz - EIGHT * PI * Sxz) - fxz
gupyz(i,j,k) * ( gupxx(i,j,k) * Axy(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Ayy(i,j,k) * Ayz(i,j,k) + & fyy = alpn1 * (Ryy - EIGHT * PI * Syy) - fyy
gupzz(i,j,k) * Ayz(i,j,k) * Azz(i,j,k) + & fyz = alpn1 * (Ryz - EIGHT * PI * Syz) - fyz
gupxy(i,j,k) * (Axy(i,j,k) * Ayz(i,j,k) + Ayy(i,j,k) * Axz(i,j,k)) + & fzz = alpn1 * (Rzz - EIGHT * PI * Szz) - fzz
gupxz(i,j,k) * (Axy(i,j,k) * Azz(i,j,k) + Ayz(i,j,k) * Axz(i,j,k)) + &
gupyz(i,j,k) * (Ayy(i,j,k) * Azz(i,j,k) + Ayz(i,j,k) * Ayz(i,j,k)) ) ) ) - & ! Compute trace-free part (note: chi^-1 and chi cancel!):
F16 * PI * rho(i,j,k) + EIGHT * PI * S_loc
f = F1o3 *( gupxx * fxx + gupyy * fyy + gupzz * fzz + &
f_loc = -F1o3 * ( gupxx(i,j,k) * fxx(i,j,k) + gupyy(i,j,k) * fyy(i,j,k) + gupzz(i,j,k) * fzz(i,j,k) + & TWO* ( gupxy * fxy + gupxz * fxz + gupyz * fyz ) )
TWO * (gupxy(i,j,k) * fxy(i,j,k) + gupxz(i,j,k) * fxz(i,j,k) + gupyz(i,j,k) * fyz(i,j,k)) + & #endif
alpn1(i,j,k)/chin_loc * f_loc )
f(i,j,k) = f_loc Axx_rhs = fxx - gxx * f
Ayy_rhs = fyy - gyy * f
l_fxx = alpn1(i,j,k) * (Rxx(i,j,k) - EIGHT * PI * Sxx(i,j,k)) - fxx(i,j,k) Azz_rhs = fzz - gzz * f
l_fxy = alpn1(i,j,k) * (Rxy(i,j,k) - EIGHT * PI * Sxy(i,j,k)) - fxy(i,j,k) Axy_rhs = fxy - gxy * f
l_fxz = alpn1(i,j,k) * (Rxz(i,j,k) - EIGHT * PI * Sxz(i,j,k)) - fxz(i,j,k) Axz_rhs = fxz - gxz * f
l_fyy = alpn1(i,j,k) * (Ryy(i,j,k) - EIGHT * PI * Syy(i,j,k)) - fyy(i,j,k) Ayz_rhs = fyz - gyz * f
l_fyz = alpn1(i,j,k) * (Ryz(i,j,k) - EIGHT * PI * Syz(i,j,k)) - fyz(i,j,k)
l_fzz = alpn1(i,j,k) * (Rzz(i,j,k) - EIGHT * PI * Szz(i,j,k)) - fzz(i,j,k) ! Now: store A_il A^l_j into fij:
Axx_rhs(i,j,k) = l_fxx - gxx(i,j,k) * f_loc fxx = gupxx * Axx * Axx + gupyy * Axy * Axy + gupzz * Axz * Axz + &
Ayy_rhs(i,j,k) = l_fyy - gyy(i,j,k) * f_loc TWO * (gupxy * Axx * Axy + gupxz * Axx * Axz + gupyz * Axy * Axz)
Azz_rhs(i,j,k) = l_fzz - gzz(i,j,k) * f_loc fyy = gupxx * Axy * Axy + gupyy * Ayy * Ayy + gupzz * Ayz * Ayz + &
Axy_rhs(i,j,k) = l_fxy - gxy(i,j,k) * f_loc TWO * (gupxy * Axy * Ayy + gupxz * Axy * Ayz + gupyz * Ayy * Ayz)
Axz_rhs(i,j,k) = l_fxz - gxz(i,j,k) * f_loc fzz = gupxx * Axz * Axz + gupyy * Ayz * Ayz + gupzz * Azz * Azz + &
Ayz_rhs(i,j,k) = l_fyz - gyz(i,j,k) * f_loc TWO * (gupxy * Axz * Ayz + gupxz * Axz * Azz + gupyz * Ayz * Azz)
fxy = gupxx * Axx * Axy + gupyy * Axy * Ayy + gupzz * Axz * Ayz + &
fxx(i,j,k) = gupxx(i,j,k) * Axx(i,j,k) * Axx(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Axy(i,j,k) + & gupxy *(Axx * Ayy + Axy * Axy) + &
gupzz(i,j,k) * Axz(i,j,k) * Axz(i,j,k) + TWO * (gupxy(i,j,k) * Axx(i,j,k) * Axy(i,j,k) + & gupxz *(Axx * Ayz + Axz * Axy) + &
gupxz(i,j,k) * Axx(i,j,k) * Axz(i,j,k) + gupyz(i,j,k) * Axy(i,j,k) * Axz(i,j,k)) gupyz *(Axy * Ayz + Axz * Ayy)
fyy(i,j,k) = gupxx(i,j,k) * Axy(i,j,k) * Axy(i,j,k) + gupyy(i,j,k) * Ayy(i,j,k) * Ayy(i,j,k) + & fxz = gupxx * Axx * Axz + gupyy * Axy * Ayz + gupzz * Axz * Azz + &
gupzz(i,j,k) * Ayz(i,j,k) * Ayz(i,j,k) + TWO * (gupxy(i,j,k) * Axy(i,j,k) * Ayy(i,j,k) + & gupxy *(Axx * Ayz + Axy * Axz) + &
gupxz(i,j,k) * Axy(i,j,k) * Ayz(i,j,k) + gupyz(i,j,k) * Ayy(i,j,k) * Ayz(i,j,k)) gupxz *(Axx * Azz + Axz * Axz) + &
fzz(i,j,k) = gupxx(i,j,k) * Axz(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Ayz(i,j,k) * Ayz(i,j,k) + & gupyz *(Axy * Azz + Axz * Ayz)
gupzz(i,j,k) * Azz(i,j,k) * Azz(i,j,k) + TWO * (gupxy(i,j,k) * Axz(i,j,k) * Ayz(i,j,k) + & fyz = gupxx * Axy * Axz + gupyy * Ayy * Ayz + gupzz * Ayz * Azz + &
gupxz(i,j,k) * Axz(i,j,k) * Azz(i,j,k) + gupyz(i,j,k) * Ayz(i,j,k) * Azz(i,j,k)) gupxy *(Axy * Ayz + Ayy * Axz) + &
fxy(i,j,k) = gupxx(i,j,k) * Axx(i,j,k) * Axy(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Ayy(i,j,k) + & gupxz *(Axy * Azz + Ayz * Axz) + &
gupzz(i,j,k) * Axz(i,j,k) * Ayz(i,j,k) + gupxy(i,j,k) * (Axx(i,j,k) * Ayy(i,j,k) + Axy(i,j,k) * Axy(i,j,k)) + & gupyz *(Ayy * Azz + Ayz * Ayz)
gupxz(i,j,k) * (Axx(i,j,k) * Ayz(i,j,k) + Axz(i,j,k) * Axy(i,j,k)) + &
gupyz(i,j,k) * (Axy(i,j,k) * Ayz(i,j,k) + Axz(i,j,k) * Ayy(i,j,k)) f = chin1
fxz(i,j,k) = gupxx(i,j,k) * Axx(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Ayz(i,j,k) + & ! store D^i D_i Lap in trK_rhs
gupzz(i,j,k) * Axz(i,j,k) * Azz(i,j,k) + gupxy(i,j,k) * (Axx(i,j,k) * Ayz(i,j,k) + Axy(i,j,k) * Axz(i,j,k)) + & trK_rhs = f*trK_rhs
gupxz(i,j,k) * (Axx(i,j,k) * Azz(i,j,k) + Axz(i,j,k) * Axz(i,j,k)) + &
gupyz(i,j,k) * (Axy(i,j,k) * Azz(i,j,k) + Axz(i,j,k) * Ayz(i,j,k)) Axx_rhs = f * Axx_rhs+ alpn1 * (trK * Axx - TWO * fxx) + &
fyz(i,j,k) = gupxx(i,j,k) * Axy(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Ayy(i,j,k) * Ayz(i,j,k) + & TWO * ( Axx * betaxx + Axy * betayx + Axz * betazx )- &
gupzz(i,j,k) * Ayz(i,j,k) * Azz(i,j,k) + gupxy(i,j,k) * (Axy(i,j,k) * Ayz(i,j,k) + Ayy(i,j,k) * Axz(i,j,k)) + & F2o3 * Axx * div_beta
gupxz(i,j,k) * (Axy(i,j,k) * Azz(i,j,k) + Ayz(i,j,k) * Axz(i,j,k)) + &
gupyz(i,j,k) * (Ayy(i,j,k) * Azz(i,j,k) + Ayz(i,j,k) * Ayz(i,j,k)) Ayy_rhs = f * Ayy_rhs+ alpn1 * (trK * Ayy - TWO * fyy) + &
TWO * ( Axy * betaxy + Ayy * betayy + Ayz * betazy )- &
trK_rhs(i,j,k) = chin_loc * trK_rhs(i,j,k) F2o3 * Ayy * div_beta
Axx_rhs(i,j,k) = chin_loc * Axx_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Axx(i,j,k) - TWO * fxx(i,j,k)) + & Azz_rhs = f * Azz_rhs+ alpn1 * (trK * Azz - TWO * fzz) + &
TWO * (Axx(i,j,k) * betaxx(i,j,k) + Axy(i,j,k) * betayx(i,j,k) + Axz(i,j,k) * betazx(i,j,k)) - & TWO * ( Axz * betaxz + Ayz * betayz + Azz * betazz )- &
F2o3 * Axx(i,j,k) * divb_loc F2o3 * Azz * div_beta
Ayy_rhs(i,j,k) = chin_loc * Ayy_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Ayy(i,j,k) - TWO * fyy(i,j,k)) + &
TWO * (Axy(i,j,k) * betaxy(i,j,k) + Ayy(i,j,k) * betayy(i,j,k) + Ayz(i,j,k) * betazy(i,j,k)) - & Axy_rhs = f * Axy_rhs+ alpn1 *( trK * Axy - TWO * fxy )+ &
F2o3 * Ayy(i,j,k) * divb_loc Axx * betaxy + Axz * betazy + &
Azz_rhs(i,j,k) = chin_loc * Azz_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Azz(i,j,k) - TWO * fzz(i,j,k)) + & Ayy * betayx + Ayz * betazx + &
TWO * (Axz(i,j,k) * betaxz(i,j,k) + Ayz(i,j,k) * betayz(i,j,k) + Azz(i,j,k) * betazz(i,j,k)) - & F1o3 * Axy * div_beta - Axy * betazz
F2o3 * Azz(i,j,k) * divb_loc
Axy_rhs(i,j,k) = chin_loc * Axy_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Axy(i,j,k) - TWO * fxy(i,j,k)) + & Ayz_rhs = f * Ayz_rhs+ alpn1 *( trK * Ayz - TWO * fyz )+ &
Axx(i,j,k) * betaxy(i,j,k) + Axz(i,j,k) * betazy(i,j,k) + Ayy(i,j,k) * betayx(i,j,k) + & Axy * betaxz + Ayy * betayz + &
Ayz(i,j,k) * betazx(i,j,k) + F1o3 * Axy(i,j,k) * divb_loc - Axy(i,j,k) * betazz(i,j,k) Axz * betaxy + Azz * betazy + &
Ayz_rhs(i,j,k) = chin_loc * Ayz_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Ayz(i,j,k) - TWO * fyz(i,j,k)) + & F1o3 * Ayz * div_beta - Ayz * betaxx
Axy(i,j,k) * betaxz(i,j,k) + Ayy(i,j,k) * betayz(i,j,k) + Axz(i,j,k) * betaxy(i,j,k) + &
Azz(i,j,k) * betazy(i,j,k) + F1o3 * Ayz(i,j,k) * divb_loc - Ayz(i,j,k) * betaxx(i,j,k) Axz_rhs = f * Axz_rhs+ alpn1 *( trK * Axz - TWO * fxz )+ &
Axz_rhs(i,j,k) = chin_loc * Axz_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Axz(i,j,k) - TWO * fxz(i,j,k)) + & Axx * betaxz + Axy * betayz + &
Axx(i,j,k) * betaxz(i,j,k) + Axy(i,j,k) * betayz(i,j,k) + Ayz(i,j,k) * betayx(i,j,k) + & Ayz * betayx + Azz * betazx + &
Azz(i,j,k) * betazx(i,j,k) + F1o3 * Axz(i,j,k) * divb_loc - Axz(i,j,k) * betayy(i,j,k) F1o3 * Axz * div_beta - Axz * betayy !rhs for Aij
trK_rhs(i,j,k) = - trK_rhs(i,j,k) + alpn1(i,j,k) * ( F1o3 * trK(i,j,k) * trK(i,j,k) + & ! Compute trace of S_ij
gupxx(i,j,k) * fxx(i,j,k) + gupyy(i,j,k) * fyy(i,j,k) + gupzz(i,j,k) * fzz(i,j,k) + &
TWO * (gupxy(i,j,k) * fxy(i,j,k) + gupxz(i,j,k) * fxz(i,j,k) + gupyz(i,j,k) * fyz(i,j,k)) + & S = f * ( gupxx * Sxx + gupyy * Syy + gupzz * Szz + &
FOUR * PI * (rho(i,j,k) + S_loc) ) TWO * ( gupxy * Sxy + gupxz * Sxz + gupyz * Syz ) )
enddo
enddo trK_rhs = - trK_rhs + alpn1 *( F1o3 * trK * trK + &
enddo gupxx * fxx + gupyy * fyy + gupzz * fzz + &
TWO * ( gupxy * fxy + gupxz * fxz + gupyz * fyz ) + &
FOUR * PI * ( rho + S )) !rhs for trK
!!!! gauge variable part !!!! gauge variable part
@@ -1000,15 +948,15 @@
!!!!!!!!!advection term + Kreiss-Oliger dissipation (merged for cache efficiency) !!!!!!!!!advection term + Kreiss-Oliger dissipation (merged for cache efficiency)
! lopsided_kodis shares the symmetry_bd buffer between advection and ! lopsided_kodis shares the symmetry_bd buffer between advection and
! dissipation, eliminating redundant full-grid copies. For metric variables ! dissipation, eliminating redundant full-grid copies. For metric variables
! gxx/gyy/gzz (=dxx/dyy/dzz+1): stencil coefficients sum to zero, ! gxx/gyy/gzz (=dxx/dyy/dzz+1): kodis stencil coefficients sum to zero,
! so the constant offset has no effect on dissipation. ! so the constant offset has no effect on dissipation.
call lopsided_kodis(ex,X,Y,Z,dxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps) call lopsided_kodis(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps) call lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps)
call lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps) call lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps)
call lopsided_kodis(ex,X,Y,Z,dyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps) call lopsided_kodis(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps) call lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps)
call lopsided_kodis(ex,X,Y,Z,dzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps) call lopsided_kodis(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps) call lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps) call lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps)

View File

@@ -2,24 +2,6 @@
#include "bssn_rhs.h" #include "bssn_rhs.h"
#include "share_func.h" #include "share_func.h"
#include "tool.h" #include "tool.h"
#ifdef _OPENMP
#define BSSN_OMP_TASK_GROUP_BEGIN \
_Pragma("omp parallel") \
{ \
_Pragma("omp single nowait") \
{
#define BSSN_OMP_TASK_CALL(...) \
_Pragma("omp task") { __VA_ARGS__; }
#define BSSN_OMP_TASK_GROUP_END \
_Pragma("omp taskwait") \
} \
}
#else
#define BSSN_OMP_TASK_GROUP_BEGIN {
#define BSSN_OMP_TASK_CALL(...) { __VA_ARGS__; }
#define BSSN_OMP_TASK_GROUP_END }
#endif
// 0-based i,j,k // 0-based i,j,k
// #define IDX_F(i,j,k,nx,ny) ((i) + (j)*(nx) + (k)*(nx)*(ny)) // #define IDX_F(i,j,k,nx,ny) ((i) + (j)*(nx) + (k)*(nx)*(ny))
// ex(1)=nx, ex(2)=ny, ex(3)=nz // ex(1)=nx, ex(2)=ny, ex(3)=nz
@@ -126,20 +108,18 @@ int f_compute_rhs_bssn(int *ex, double &T,
chin1[i] = chi[i] + 1.0; chin1[i] = chi[i] + 1.0;
} }
// 9ms // // 9ms //
BSSN_OMP_TASK_GROUP_BEGIN fderivs(ex,betax,betaxx,betaxy,betaxz,X,Y,Z,ANTI, SYM, SYM,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,betax,betaxx,betaxy,betaxz,X,Y,Z,ANTI, SYM, SYM,Symmetry,Lev)) fderivs(ex,betay,betayx,betayy,betayz,X,Y,Z, SYM,ANTI, SYM,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,betay,betayx,betayy,betayz,X,Y,Z, SYM,ANTI, SYM,Symmetry,Lev)) fderivs(ex,betaz,betazx,betazy,betazz,X,Y,Z, SYM, SYM,ANTI,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,betaz,betazx,betazy,betazz,X,Y,Z, SYM, SYM,ANTI,Symmetry,Lev)) fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)) fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)) fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev)) fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev)) fderivs(ex,dyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,dyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)) fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev)) fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)) fderivs(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)) fderivs(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev))
BSSN_OMP_TASK_GROUP_END
// 3ms // // 3ms //
for(int i=0;i<all;i+=1){ for(int i=0;i<all;i+=1){
@@ -336,14 +316,15 @@ int f_compute_rhs_bssn(int *ex, double &T,
); );
} }
// 22.3ms // // 22.3ms //
BSSN_OMP_TASK_GROUP_BEGIN fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,
BSSN_OMP_TASK_CALL(fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev)) X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fdderivs(ex,betay,gxxy,gxyy,gxzy,gyyy,gyzy,gzzy,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev)) fdderivs(ex,betay,gxxy,gxyy,gxzy,gyyy,gyzy,gzzy,
BSSN_OMP_TASK_CALL(fdderivs(ex,betaz,gxxz,gxyz,gxzz,gyyz,gyzz,gzzz,X,Y,Z,SYM ,SYM, ANTI,Symmetry,Lev)) X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev)) fdderivs(ex,betaz,gxxz,gxyz,gxzz,gyyz,gyzz,gzzz,
BSSN_OMP_TASK_CALL(fderivs(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev)) X,Y,Z,SYM ,SYM, ANTI,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev)) fderivs(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev);
BSSN_OMP_TASK_GROUP_END fderivs(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev);
fderivs(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev);
// Fused: fxx/Gamxa + Gamma_rhs part 2 (2 loops -> 1) // Fused: fxx/Gamxa + Gamma_rhs part 2 (2 loops -> 1)
for(int i=0;i<all;i+=1){ for(int i=0;i<all;i+=1){
@@ -735,7 +716,6 @@ int f_compute_rhs_bssn(int *ex, double &T,
// 24ms // // 24ms //
fdderivs(ex,Lap,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev); fdderivs(ex,Lap,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
fderivs(ex,chi,dtSfx_rhs,dtSfy_rhs,dtSfz_rhs,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
// 6ms // // 6ms //
for (int i=0;i<all;i+=1) { for (int i=0;i<all;i+=1) {
@@ -1033,12 +1013,12 @@ int f_compute_rhs_bssn(int *ex, double &T,
betaz_rhs[i] = FF * dtSfz[i]; betaz_rhs[i] = FF * dtSfz[i];
reta[i] = reta[i] =
gupxx[i] * dtSfx_rhs[i] * dtSfx_rhs[i] gupxx[i] * chix[i] * chix[i]
+ gupyy[i] * dtSfy_rhs[i] * dtSfy_rhs[i] + gupyy[i] * chiy[i] * chiy[i]
+ gupzz[i] * dtSfz_rhs[i] * dtSfz_rhs[i] + gupzz[i] * chiz[i] * chiz[i]
+ TWO * ( gupxy[i] * dtSfx_rhs[i] * dtSfy_rhs[i] + TWO * ( gupxy[i] * chix[i] * chiy[i]
+ gupxz[i] * dtSfx_rhs[i] * dtSfz_rhs[i] + gupxz[i] * chix[i] * chiz[i]
+ gupyz[i] * dtSfy_rhs[i] * dtSfz_rhs[i] ); + gupyz[i] * chiy[i] * chiz[i] );
#if (GAUGE == 2) #if (GAUGE == 2)
reta[i] = 1.31 / 2.0 * sqrt( reta[i] / chin1[i] ) / pow( (ONE - sqrt(chin1[i])), 2.0 ); reta[i] = 1.31 / 2.0 * sqrt( reta[i] / chin1[i] ) / pow( (ONE - sqrt(chin1[i])), 2.0 );
@@ -1051,12 +1031,12 @@ int f_compute_rhs_bssn(int *ex, double &T,
dtSfz_rhs[i] = Gamz_rhs[i] - reta[i] * dtSfz[i]; dtSfz_rhs[i] = Gamz_rhs[i] - reta[i] * dtSfz[i];
#elif (GAUGE == 4 || GAUGE == 5) #elif (GAUGE == 4 || GAUGE == 5)
reta[i] = reta[i] =
gupxx[i] * dtSfx_rhs[i] * dtSfx_rhs[i] gupxx[i] * chix[i] * chix[i]
+ gupyy[i] * dtSfy_rhs[i] * dtSfy_rhs[i] + gupyy[i] * chiy[i] * chiy[i]
+ gupzz[i] * dtSfz_rhs[i] * dtSfz_rhs[i] + gupzz[i] * chiz[i] * chiz[i]
+ TWO * ( gupxy[i] * dtSfx_rhs[i] * dtSfy_rhs[i] + TWO * ( gupxy[i] * chix[i] * chiy[i]
+ gupxz[i] * dtSfx_rhs[i] * dtSfz_rhs[i] + gupxz[i] * chix[i] * chiz[i]
+ gupyz[i] * dtSfy_rhs[i] * dtSfz_rhs[i] ); + gupyz[i] * chiy[i] * chiz[i] );
#if (GAUGE == 4) #if (GAUGE == 4)
reta[i] = 1.31 / 2.0 * sqrt( reta[i] / chin1[i] ) / pow( (ONE - sqrt(chin1[i])), 2.0 ); reta[i] = 1.31 / 2.0 * sqrt( reta[i] / chin1[i] ) / pow( (ONE - sqrt(chin1[i])), 2.0 );
@@ -1082,32 +1062,30 @@ int f_compute_rhs_bssn(int *ex, double &T,
#endif #endif
} }
// advection + KO dissipation with shared symmetry buffer // advection + KO dissipation with shared symmetry buffer
BSSN_OMP_TASK_GROUP_BEGIN lopsided_kodis(ex,X,Y,Z,dxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,dxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps)) lopsided_kodis(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA,eps)) lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps)) lopsided_kodis(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS,eps)) lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps)) lopsided_kodis(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS,eps)) lopsided_kodis(ex,X,Y,Z,dyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,dyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps)) lopsided_kodis(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS,eps)) lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps)) lopsided_kodis(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA,eps)) lopsided_kodis(ex,X,Y,Z,dzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,dzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps)) lopsided_kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS,eps)) lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps)) lopsided_kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS,eps)) lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps)) lopsided_kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA,eps)) lopsided_kodis(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA,eps)) lopsided_kodis(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS,eps)) lopsided_kodis(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA,eps)) lopsided_kodis(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS,eps)) lopsided_kodis(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS,eps)) lopsided_kodis(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS,eps)) lopsided_kodis(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS,eps)) lopsided_kodis(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS,eps))
BSSN_OMP_TASK_GROUP_END
// 2ms // // 2ms //
if(co==0){ if(co==0){
for (int i=0;i<all;i+=1) { for (int i=0;i<all;i+=1) {
@@ -1154,14 +1132,12 @@ int f_compute_rhs_bssn(int *ex, double &T,
} }
// 1ms // // 1ms //
BSSN_OMP_TASK_GROUP_BEGIN fderivs(ex,Axx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0);
BSSN_OMP_TASK_CALL(fderivs(ex,Axx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0)) fderivs(ex,Axy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,0);
BSSN_OMP_TASK_CALL(fderivs(ex,Axy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,0)) fderivs(ex,Axz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,0);
BSSN_OMP_TASK_CALL(fderivs(ex,Axz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,0)) fderivs(ex,Ayy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0);
BSSN_OMP_TASK_CALL(fderivs(ex,Ayy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0)) fderivs(ex,Ayz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,0);
BSSN_OMP_TASK_CALL(fderivs(ex,Ayz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,0)) fderivs(ex,Azz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0);
BSSN_OMP_TASK_CALL(fderivs(ex,Azz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0))
BSSN_OMP_TASK_GROUP_END
// 7ms // // 7ms //
for (int i=0;i<all;i+=1) { for (int i=0;i<all;i+=1) {
gxxx[i] = gxxx[i] - ( Gamxxx[i] * Axx[i] + Gamyxx[i] * Axy[i] + Gamzxx[i] * Axz[i] gxxx[i] = gxxx[i] - ( Gamxxx[i] * Axx[i] + Gamyxx[i] * Axy[i] + Gamzxx[i] * Axz[i]

View File

@@ -41,8 +41,8 @@ void fdderivs(const int ex[3],
const size_t nz = (size_t)ex3 + 2; const size_t nz = (size_t)ex3 + 2;
const size_t fh_size = nx * ny * nz; const size_t fh_size = nx * ny * nz;
static thread_local double *fh = NULL; static double *fh = NULL;
static thread_local size_t cap = 0; static size_t cap = 0;
if (fh_size > cap) { if (fh_size > cap) {
free(fh); free(fh);

View File

@@ -50,8 +50,8 @@ void fderivs(const int ex[3],
const size_t ny = (size_t)ex2 + 2; const size_t ny = (size_t)ex2 + 2;
const size_t nz = (size_t)ex3 + 2; const size_t nz = (size_t)ex3 + 2;
const size_t fh_size = nx * ny * nz; const size_t fh_size = nx * ny * nz;
static thread_local double *fh = NULL; static double *fh = NULL;
static thread_local size_t cap = 0; static size_t cap = 0;
if (fh_size > cap) { if (fh_size > cap) {
free(fh); free(fh);

View File

@@ -43,13 +43,7 @@ void lopsided_kodis(const int ex[3],
const size_t nz = (size_t)ex3 + 3; const size_t nz = (size_t)ex3 + 3;
const size_t fh_size = nx * ny * nz; const size_t fh_size = nx * ny * nz;
static thread_local double *fh = NULL; double *fh = (double*)malloc(fh_size * sizeof(double));
static thread_local size_t cap = 0;
if (fh_size > cap) {
free(fh);
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
if (!fh) return; if (!fh) return;
symmetry_bd(3, ex, f, fh, SoA); symmetry_bd(3, ex, f, fh, SoA);
@@ -249,4 +243,6 @@ void lopsided_kodis(const int ex[3],
} }
} }
} }
free(fh);
} }

View File

@@ -16,7 +16,7 @@ PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/default.profdata
ifeq ($(PGO_MODE),instrument) ifeq ($(PGO_MODE),instrument)
## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability ## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability
CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \ CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) $(OPENMP_FLAGS) -Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS)
f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \ f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG) -align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
else else
@@ -26,7 +26,7 @@ else
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \ CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) $(OPENMP_FLAGS) -Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS)
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \ f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG) -align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
endif endif
@@ -64,8 +64,8 @@ lopsided_c.o: lopsided_c.C
lopsided_kodis_c.o: lopsided_kodis_c.C lopsided_kodis_c.o: lopsided_kodis_c.C
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
#interp_lb_profile.o: interp_lb_profile.C interp_lb_profile.h interp_lb_profile.o: interp_lb_profile.C interp_lb_profile.h
# ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
## TwoPunctureABE uses fixed optimal flags with its own PGO profile, independent of CXXAPPFLAGS ## TwoPunctureABE uses fixed optimal flags with its own PGO profile, independent of CXXAPPFLAGS
TP_PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/TwoPunctureABE.profdata TP_PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/TwoPunctureABE.profdata

View File

@@ -29,9 +29,6 @@ endif
## instrument : PGO Phase 1 instrumentation to collect fresh profile data ## instrument : PGO Phase 1 instrumentation to collect fresh profile data
PGO_MODE ?= opt PGO_MODE ?= opt
## OpenMP switch for C/C++ kernels
OPENMP_FLAGS ?= -qopenmp
## Interp_Points load balance profiling mode ## Interp_Points load balance profiling mode
## off : (default) no load balance instrumentation ## off : (default) no load balance instrumentation
## profile : Pass 1 — instrument Interp_Points to collect timing profile ## profile : Pass 1 — instrument Interp_Points to collect timing profile

View File

@@ -317,9 +317,9 @@ void scalar_class::Setup_Initial_Data()
#endif #endif
} }
} }
void scalar_class::Evolve(int Steps) void scalar_class::Evolve(int Steps)
{ {
double prev_clock = 0.0, curr_clock = 0.0; clock_t prev_clock, curr_clock;
double LastDump = 0.0, LastCheck = 0.0; double LastDump = 0.0, LastCheck = 0.0;
LastAnas = 0; LastAnas = 0;
@@ -327,8 +327,8 @@ void scalar_class::Evolve(int Steps)
for (int ncount = 1; ncount < Steps + 1; ncount++) for (int ncount = 1; ncount < Steps + 1; ncount++)
{ {
if (myrank == 0) if (myrank == 0)
curr_clock = MPI_Wtime(); curr_clock = clock();
RecursiveStep(0); RecursiveStep(0);
LastDump += dT_mon; LastDump += dT_mon;
@@ -343,13 +343,13 @@ void scalar_class::Evolve(int Steps)
#endif #endif
LastDump = 0; LastDump = 0;
} }
if (myrank == 0) if (myrank == 0)
{ {
prev_clock = curr_clock; prev_clock = curr_clock;
curr_clock = MPI_Wtime(); curr_clock = clock();
cout << " Timestep # " << ncount << ": integrating to time: " << PhysTime cout << " Timestep # " << ncount << ": integrating to time: " << PhysTime
<< " Computer used " << (curr_clock - prev_clock) << " seconds! " << endl; << " Computer used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds! " << endl;
} }
if (PhysTime >= TotalTime) if (PhysTime >= TotalTime)
break; break;
} }

View File

@@ -9,7 +9,6 @@
import AMSS_NCKU_Input as input_data import AMSS_NCKU_Input as input_data
import os
import subprocess import subprocess
import time import time
@@ -55,44 +54,6 @@ NUMACTL_CPU_BIND = get_last_n_cores_per_socket(n=32)
BUILD_JOBS = 64 BUILD_JOBS = 64
def build_abe_runtime_env():
"""Inject OpenMP runtime settings only for the main ABE evolution run."""
runtime_env = os.environ.copy()
omp_threads = max(1, int(getattr(input_data, "OMP_Threads", 1)))
runtime_env["OMP_NUM_THREADS"] = str(omp_threads)
return runtime_env
def build_twopuncture_runtime_env():
"""Let TwoPunctureABE use the runtime default instead of the ABE OMP override."""
runtime_env = os.environ.copy()
runtime_env.pop("OMP_NUM_THREADS", None)
runtime_env.pop("OMP_THREAD_LIMIT", None)
return runtime_env
def build_mpi_launch_args():
"""Build optional host-distribution arguments for mpirun."""
hosts = list(getattr(input_data, "MPI_hosts", []))
ppn = int(getattr(input_data, "MPI_processes_per_node", 0))
if not hosts:
return ""
if ppn > 0:
expected = len(hosts) * ppn
if int(input_data.MPI_processes) != expected:
raise ValueError(
f"MPI_processes={input_data.MPI_processes} does not match "
f"len(MPI_hosts) * MPI_processes_per_node = {expected}"
)
launch_args = f"-hosts {','.join(hosts)}"
if ppn > 0:
launch_args += f" -ppn {ppn}"
return launch_args
################################################################## ##################################################################
@@ -182,38 +143,19 @@ def run_ABE():
print( ) print( )
print( " Running the AMSS-NCKU executable file ABE/ABEGPU " ) print( " Running the AMSS-NCKU executable file ABE/ABEGPU " )
print( ) print( )
print( f" MPI processes = {input_data.MPI_processes}, OMP threads per process = {max(1, int(getattr(input_data, 'OMP_Threads', 1)))}" )
if getattr(input_data, "MPI_hosts", []):
print( f" MPI hosts = {getattr(input_data, 'MPI_hosts', [])}, MPI ranks per node = {int(getattr(input_data, 'MPI_processes_per_node', 0))}" )
print( " Multi-node runs require the working directory to be visible on all MPI hosts. " )
print( )
## Define the command to run; cast other values to strings as needed ## Define the command to run; cast other values to strings as needed
mpi_launch_args = build_mpi_launch_args()
if (input_data.GPU_Calculation == "no"): if (input_data.GPU_Calculation == "no"):
mpi_command = NUMACTL_CPU_BIND + " mpirun " mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
if mpi_launch_args:
mpi_command += mpi_launch_args + " "
mpi_command += "-np " + str(input_data.MPI_processes) + " ./ABE"
#mpi_command = " mpirun -np " + str(input_data.MPI_processes) + " ./ABE" #mpi_command = " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
mpi_command_outfile = "ABE_out.log" mpi_command_outfile = "ABE_out.log"
elif (input_data.GPU_Calculation == "yes"): elif (input_data.GPU_Calculation == "yes"):
mpi_command = NUMACTL_CPU_BIND + " mpirun " mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU"
if mpi_launch_args:
mpi_command += mpi_launch_args + " "
mpi_command += "-np " + str(input_data.MPI_processes) + " ./ABEGPU"
mpi_command_outfile = "ABEGPU_out.log" mpi_command_outfile = "ABEGPU_out.log"
## Execute the MPI command and stream output ## Execute the MPI command and stream output
mpi_process = subprocess.Popen( mpi_process = subprocess.Popen(mpi_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
mpi_command,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
env=build_abe_runtime_env(),
)
## Write ABE run output to file while printing to stdout ## Write ABE run output to file while printing to stdout
with open(mpi_command_outfile, 'w') as file0: with open(mpi_command_outfile, 'w') as file0:
@@ -253,14 +195,7 @@ def run_TwoPunctureABE():
TwoPuncture_command_outfile = "TwoPunctureABE_out.log" TwoPuncture_command_outfile = "TwoPunctureABE_out.log"
## Execute the command with subprocess.Popen and stream output ## Execute the command with subprocess.Popen and stream output
TwoPuncture_process = subprocess.Popen( TwoPuncture_process = subprocess.Popen(TwoPuncture_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
TwoPuncture_command,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
env=build_twopuncture_runtime_env(),
)
## Write TwoPunctureABE run output to file while printing to stdout ## Write TwoPunctureABE run output to file while printing to stdout
with open(TwoPuncture_command_outfile, 'w') as file0: with open(TwoPuncture_command_outfile, 'w') as file0:

View File

@@ -65,14 +65,10 @@ def print_input_data( File_directory ):
print( "------------------------------------------------------------------------------------------" ) print( "------------------------------------------------------------------------------------------" )
print( ) print( )
print( " Printing the basic parameter and setting in the AMSS-NCKU simulation " ) print( " Printing the basic parameter and setting in the AMSS-NCKU simulation " )
print( ) print( )
print( " The number of MPI processes in the AMSS-NCKU simulation = ", input_data.MPI_processes ) print( " The number of MPI processes in the AMSS-NCKU simulation = ", input_data.MPI_processes )
print( " The number of OMP threads per MPI process = ", input_data.OMP_Threads ) print( )
if getattr(input_data, "MPI_hosts", []):
print( " The MPI host list in the AMSS-NCKU simulation = ", input_data.MPI_hosts )
print( " The number of MPI ranks launched per host = ", input_data.MPI_processes_per_node )
print( )
print( " The form of computational equation = ", input_data.Equation_Class ) print( " The form of computational equation = ", input_data.Equation_Class )
print( " The initial data in this simulation = ", input_data.Initial_Data_Method ) print( " The initial data in this simulation = ", input_data.Initial_Data_Method )
print( ) print( )
@@ -144,14 +140,10 @@ def print_input_data( File_directory ):
file0 = open(filepath, 'w') file0 = open(filepath, 'w')
print( file=file0 ) print( file=file0 )
print( " Printing the basic parameter and setting in the AMSS-NCKU simulation ", file=file0 ) print( " Printing the basic parameter and setting in the AMSS-NCKU simulation ", file=file0 )
print( file=file0 ) print( file=file0 )
print( " The number of MPI processes in the AMSS-NCKU simulation = ", input_data.MPI_processes, file=file0 ) print( " The number of MPI processes in the AMSS-NCKU simulation = ", input_data.MPI_processes, file=file0 )
print( " The number of OMP threads per MPI process = ", input_data.OMP_Threads, file=file0 ) print( file=file0 )
if getattr(input_data, "MPI_hosts", []):
print( " The MPI host list in the AMSS-NCKU simulation = ", input_data.MPI_hosts, file=file0 )
print( " The number of MPI ranks launched per host = ", input_data.MPI_processes_per_node, file=file0 )
print( file=file0 )
print( " The form of computational equation = ", input_data.Equation_Class, file=file0 ) print( " The form of computational equation = ", input_data.Equation_Class, file=file0 )
print( " The initial data in this simulation = ", input_data.Initial_Data_Method, file=file0 ) print( " The initial data in this simulation = ", input_data.Initial_Data_Method, file=file0 )
print( file=file0 ) print( file=file0 )