Remove OpenMP from C rewrite kernel
The C rewrite introduced OpenMP parallelism. Remove all OpenMP.
This commit is contained in:
@@ -34,7 +34,6 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
int &Symmetry, int &Lev, double &eps, int &co
|
||||
) // return gont
|
||||
{
|
||||
double t0 = omp_get_wtime();
|
||||
int nx = ex[0], ny = ex[1], nz=ex[2];
|
||||
int all = nx*ny*nz;
|
||||
// printf("nx=%d ny=%d nz=%d all=%d\n", nx, ny, nz, all);
|
||||
@@ -80,15 +79,8 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
dY = Y[1] - Y[0];
|
||||
dZ = Z[1] - Z[0];
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
int tid = omp_get_thread_num(); // 当前线程号(从 0 开始)
|
||||
int nthr = omp_get_num_threads(); // 当前并行区里的总线程数
|
||||
int local = all / nthr;
|
||||
int start = tid * local;
|
||||
int end = (tid == nthr - 1) ? all : start + local;
|
||||
// 1ms //
|
||||
for(int i=start;i<end;i+=1){
|
||||
for(int i=0;i<all;i+=1){
|
||||
alpn1[i] = Lap[i] + 1.0;
|
||||
chin1[i] = chi[i] + 1.0;
|
||||
gxx[i] = dxx[i] + 1.0;
|
||||
@@ -96,27 +88,21 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
gzz[i] = dzz[i] + 1.0;
|
||||
}
|
||||
// 9ms //
|
||||
for(int _task=tid; _task<12; _task+=nthr){
|
||||
switch(_task){
|
||||
case 0: fderivs(ex,betax,betaxx,betaxy,betaxz,X,Y,Z,ANTI, SYM, SYM,Symmetry,Lev); break;
|
||||
case 1: fderivs(ex,betay,betayx,betayy,betayz,X,Y,Z, SYM,ANTI, SYM,Symmetry,Lev); break;
|
||||
case 2: fderivs(ex,betaz,betazx,betazy,betazz,X,Y,Z, SYM, SYM,ANTI,Symmetry,Lev); break;
|
||||
case 3: fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev); break;
|
||||
case 4: fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev); break;
|
||||
case 5: fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev); break;
|
||||
case 6: fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev); break;
|
||||
case 7: fderivs(ex,dyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev); break;
|
||||
case 8: fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev); break;
|
||||
case 9: fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev); break;
|
||||
case 10: fderivs(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev); break;
|
||||
case 11: fderivs(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev); break;
|
||||
}
|
||||
}
|
||||
fderivs(ex,betax,betaxx,betaxy,betaxz,X,Y,Z,ANTI, SYM, SYM,Symmetry,Lev);
|
||||
fderivs(ex,betay,betayx,betayy,betayz,X,Y,Z, SYM,ANTI, SYM,Symmetry,Lev);
|
||||
fderivs(ex,betaz,betazx,betazy,betazz,X,Y,Z, SYM, SYM,ANTI,Symmetry,Lev);
|
||||
fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
|
||||
fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev);
|
||||
fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev);
|
||||
fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev);
|
||||
fderivs(ex,dyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev);
|
||||
fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev);
|
||||
fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev);
|
||||
fderivs(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
|
||||
fderivs(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
|
||||
|
||||
// 3ms //
|
||||
//omp_barrier(); // 确保所有线程都完成了 betax, betay, betaz 的导数计算
|
||||
#pragma omp barrier
|
||||
for(int i=start;i<end;i+=1){
|
||||
for(int i=0;i<all;i+=1){
|
||||
div_beta[i] = betaxx[i] + betayy[i] + betazz[i];
|
||||
chi_rhs[i] = F2o3 * chin1[i] * (alpn1[i] * trK[i] - div_beta[i]);
|
||||
gxx_rhs[i] = -TWO * alpn1[i] * Axx[i] - F2o3 * gxx[i] * div_beta[i] +
|
||||
@@ -136,7 +122,7 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
+ gzz[i] * betazx[i] - gxz[i] * betayy[i];
|
||||
}
|
||||
// 1ms //
|
||||
for(int i=start;i<end;i+=1){
|
||||
for(int i=0;i<all;i+=1){
|
||||
double det = gxx[i] * gyy[i] * gzz[i] + gxy[i] * gyz[i] * gxz[i] + gxz[i] * gxy[i] * gyz[i] -
|
||||
gxz[i] * gyy[i] * gxz[i] - gxy[i] * gxy[i] * gzz[i] - gxx[i] * gyz[i] * gyz[i];
|
||||
gupxx[i] = (gyy[i] * gzz[i] - gyz[i] * gyz[i]) / det;
|
||||
@@ -148,7 +134,7 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
}
|
||||
// 2.2ms //
|
||||
if(co==0){
|
||||
for (int i=start;i<end;i+=1) {
|
||||
for (int i=0;i<all;i+=1) {
|
||||
Gmx_Res[i] = Gamx[i] - (
|
||||
gupxx[i] * (gupxx[i]*gxxx[i] + gupxy[i]*gxyx[i] + gupxz[i]*gxzx[i]) +
|
||||
gupxy[i] * (gupxx[i]*gxyx[i] + gupxy[i]*gyyx[i] + gupxz[i]*gyzx[i]) +
|
||||
@@ -193,7 +179,7 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
}
|
||||
}
|
||||
// 5ms //
|
||||
for (int i=start;i<end;i+=1) {
|
||||
for (int i=0;i<all;i+=1) {
|
||||
|
||||
Gamxxx[i] = HALF * ( gupxx[i]*gxxx[i]
|
||||
+ gupxy[i]*(TWO*gxyx[i] - gxxy[i])
|
||||
@@ -269,7 +255,7 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
|
||||
}
|
||||
// 1.8ms //
|
||||
for (int i=start;i<end;i+=1) {
|
||||
for (int i=0;i<all;i+=1) {
|
||||
|
||||
Rxx[i] = gupxx[i]*gupxx[i]*Axx[i]
|
||||
+ gupxy[i]*gupxy[i]*Ayy[i]
|
||||
@@ -314,7 +300,7 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
+ ( gupyy[i]*gupzz[i] + gupyz[i]*gupyz[i] ) * Ayz[i];
|
||||
}
|
||||
// 4ms //
|
||||
for(int i=start;i<end;i+=1){
|
||||
for(int i=0;i<all;i+=1){
|
||||
Gamx_rhs[i] = - TWO * ( Lapx[i] * Rxx[i] + Lapy[i] * Rxy[i] + Lapz[i] * Rxz[i] ) +
|
||||
TWO * alpn1[i] * (
|
||||
-F3o2/chin1[i] * ( chix[i] * Rxx[i] + chiy[i] * Rxy[i] + chiz[i] * Rxz[i] ) -
|
||||
@@ -345,23 +331,18 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
);
|
||||
}
|
||||
// 22.3ms //
|
||||
for(int _task=tid; _task<6; _task+=nthr){
|
||||
switch(_task){
|
||||
case 0: fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,
|
||||
X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev); break;
|
||||
case 1: fdderivs(ex,betay,gxxy,gxyy,gxzy,gyyy,gyzy,gzzy,
|
||||
X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev); break;
|
||||
case 2: fdderivs(ex,betaz,gxxz,gxyz,gxzz,gyyz,gyzz,gzzz,
|
||||
X,Y,Z,SYM ,SYM, ANTI,Symmetry,Lev); break;
|
||||
case 3: fderivs(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev); break;
|
||||
case 4: fderivs(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev); break;
|
||||
case 5: fderivs(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev); break;
|
||||
}
|
||||
}
|
||||
fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,
|
||||
X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev);
|
||||
fdderivs(ex,betay,gxxy,gxyy,gxzy,gyyy,gyzy,gzzy,
|
||||
X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev);
|
||||
fdderivs(ex,betaz,gxxz,gxyz,gxzz,gyyz,gyzz,gzzz,
|
||||
X,Y,Z,SYM ,SYM, ANTI,Symmetry,Lev);
|
||||
fderivs(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev);
|
||||
fderivs(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev);
|
||||
fderivs(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev);
|
||||
|
||||
// 3.5ms //
|
||||
#pragma omp barrier
|
||||
for(int i=start;i<end;i+=1){
|
||||
for(int i=0;i<all;i+=1){
|
||||
fxx[i] = gxxx[i] + gxyy[i] + gxzz[i];
|
||||
fxy[i] = gxyx[i] + gyyy[i] + gyzz[i];
|
||||
fxz[i] = gxzx[i] + gyzy[i] + gzzz[i];
|
||||
@@ -375,7 +356,7 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
+ TWO * ( gupxy[i]*Gamzxy[i] + gupxz[i]*Gamzxz[i] + gupyz[i]*Gamzyz[i] );
|
||||
}
|
||||
// 3.9ms //
|
||||
for(int i=start;i<end;i+=1){
|
||||
for(int i=0;i<all;i+=1){
|
||||
Gamx_rhs[i] = Gamx_rhs[i]
|
||||
+ F2o3 * Gamxa[i] * div_beta[i]
|
||||
- Gamxa[i] * betaxx[i] - Gamya[i] * betaxy[i] - Gamza[i] * betaxz[i]
|
||||
@@ -398,7 +379,7 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
+ TWO * ( gupxy[i] * gxyz[i] + gupxz[i] * gxzz[i] + gupyz[i] * gyzz[i] );
|
||||
}
|
||||
// 4.4ms //
|
||||
for (int i=start;i<end;i+=1) {
|
||||
for (int i=0;i<all;i+=1) {
|
||||
gxxx[i] = gxx[i]*Gamxxx[i] + gxy[i]*Gamyxx[i] + gxz[i]*Gamzxx[i];
|
||||
gxyx[i] = gxx[i]*Gamxxy[i] + gxy[i]*Gamyxy[i] + gxz[i]*Gamzxy[i];
|
||||
gxzx[i] = gxx[i]*Gamxxz[i] + gxy[i]*Gamyxz[i] + gxz[i]*Gamzxz[i];
|
||||
@@ -421,69 +402,51 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
gzzz[i] = gxz[i]*Gamxzz[i] + gyz[i]*Gamyzz[i] + gzz[i]*Gamzzz[i];
|
||||
}
|
||||
// 22.2ms //
|
||||
if(tid==0){
|
||||
fdderivs(ex,dxx,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev);
|
||||
}
|
||||
|
||||
// 0.7ms //
|
||||
#pragma omp barrier
|
||||
for (int i = start; i < end; i += 1) {
|
||||
for (int i = 0; i < all; i += 1) {
|
||||
Rxx[i] = gupxx[i] * fxx[i] + gupyy[i] * fyy[i] + gupzz[i] * fzz[i]
|
||||
+ (gupxy[i] * fxy[i] + gupxz[i] * fxz[i] + gupyz[i] * fyz[i]) * TWO;
|
||||
}
|
||||
// 23ms //
|
||||
if(tid==0){
|
||||
fdderivs(ex,dyy,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev);
|
||||
}
|
||||
#pragma omp barrier
|
||||
for (int i = start; i < end; i += 1) {
|
||||
for (int i = 0; i < all; i += 1) {
|
||||
Ryy[i] = gupxx[i] * fxx[i] + gupyy[i] * fyy[i] + gupzz[i] * fzz[i]
|
||||
+ (gupxy[i] * fxy[i] + gupxz[i] * fxz[i] + gupyz[i] * fyz[i]) * TWO;
|
||||
}
|
||||
|
||||
// 23ms //
|
||||
if(tid==0){
|
||||
fdderivs(ex,dzz,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev);
|
||||
}
|
||||
#pragma omp barrier
|
||||
for (int i = start; i < end; i += 1) {
|
||||
for (int i = 0; i < all; i += 1) {
|
||||
Rzz[i] = gupxx[i] * fxx[i] + gupyy[i] * fyy[i] + gupzz[i] * fzz[i]
|
||||
+ (gupxy[i] * fxy[i] + gupxz[i] * fxz[i] + gupyz[i] * fyz[i]) * TWO;
|
||||
}
|
||||
|
||||
// 23ms //
|
||||
if(tid==0){
|
||||
fdderivs(ex,gxy,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,ANTI, ANTI,SYM ,Symmetry,Lev);
|
||||
}
|
||||
#pragma omp barrier
|
||||
for (int i = start; i < end; i += 1) {
|
||||
for (int i = 0; i < all; i += 1) {
|
||||
Rxy[i] = gupxx[i] * fxx[i] + gupyy[i] * fyy[i] + gupzz[i] * fzz[i]
|
||||
+ (gupxy[i] * fxy[i] + gupxz[i] * fxz[i] + gupyz[i] * fyz[i]) * TWO;
|
||||
}
|
||||
|
||||
// 23ms //
|
||||
if(tid==0){
|
||||
fdderivs(ex,gxz,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,ANTI ,SYM ,ANTI,Symmetry,Lev);
|
||||
}
|
||||
#pragma omp barrier
|
||||
for (int i = start; i < end; i += 1) {
|
||||
for (int i = 0; i < all; i += 1) {
|
||||
Rxz[i] = gupxx[i] * fxx[i] + gupyy[i] * fyy[i] + gupzz[i] * fzz[i]
|
||||
+ (gupxy[i] * fxy[i] + gupxz[i] * fxz[i] + gupyz[i] * fyz[i]) * TWO;
|
||||
}
|
||||
|
||||
// 23ms //
|
||||
if(tid==0){
|
||||
fdderivs(ex,gyz,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM ,ANTI ,ANTI,Symmetry,Lev);
|
||||
}
|
||||
#pragma omp barrier
|
||||
for (int i = start; i < end; i += 1) {
|
||||
for (int i = 0; i < all; i += 1) {
|
||||
Ryz[i] = gupxx[i] * fxx[i] + gupyy[i] * fyy[i] + gupzz[i] * fzz[i]
|
||||
+ (gupxy[i] * fxy[i] + gupxz[i] * fxz[i] + gupyz[i] * fyz[i]) * TWO;
|
||||
}
|
||||
|
||||
// 14ms //
|
||||
/* 假设 all = ex1*ex2*ex3,所有量都是 length=all 的 double 数组(已按同一扁平化规则排布) */
|
||||
for (int i = start; i < end; i += 1) {
|
||||
for (int i = 0; i < all; i += 1) {
|
||||
Rxx[i] =
|
||||
-HALF * Rxx[i]
|
||||
+ gxx[i] * Gamxx[i] + gxy[i] * Gamyx[i] + gxz[i] * Gamzx[i]
|
||||
@@ -735,13 +698,10 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
}
|
||||
|
||||
// 22.3ms //
|
||||
if(tid==0){
|
||||
fdderivs(ex,chi,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
|
||||
}
|
||||
|
||||
// 7ms //
|
||||
#pragma omp barrier
|
||||
for (int i=start;i<end;i+=1) {
|
||||
for (int i=0;i<all;i+=1) {
|
||||
fxx[i] = fxx[i] - Gamxxx[i] * chix[i] - Gamyxx[i] * chiy[i] - Gamzxx[i] * chiz[i];
|
||||
fxy[i] = fxy[i] - Gamxxy[i] * chix[i] - Gamyxy[i] * chiy[i] - Gamzxy[i] * chiz[i];
|
||||
fxz[i] = fxz[i] - Gamxxz[i] * chix[i] - Gamyxz[i] * chiy[i] - Gamzxz[i] * chiz[i];
|
||||
@@ -765,16 +725,11 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
}
|
||||
|
||||
// 24ms //
|
||||
for(int _task=tid; _task<2; _task+=nthr){
|
||||
switch(_task){
|
||||
case 0: fdderivs(ex,Lap,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev); break;
|
||||
case 1: fderivs(ex,chi,dtSfx_rhs,dtSfy_rhs,dtSfz_rhs,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev); break;
|
||||
}
|
||||
}
|
||||
fdderivs(ex,Lap,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
|
||||
fderivs(ex,chi,dtSfx_rhs,dtSfy_rhs,dtSfz_rhs,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
|
||||
|
||||
// 6ms //
|
||||
#pragma omp barrier
|
||||
for (int i=start;i<end;i+=1) {
|
||||
for (int i=0;i<all;i+=1) {
|
||||
/* gxxx,gxxy,gxxz (这里是“升指标后的chi导数/chi”那类量,你沿用原变量名即可) */
|
||||
gxxx[i] = (gupxx[i] * chix[i] + gupxy[i] * chiy[i] + gupxz[i] * chiz[i]) / chin1[i];
|
||||
gxxy[i] = (gupxy[i] * chix[i] + gupyy[i] * chiy[i] + gupyz[i] * chiz[i]) / chin1[i];
|
||||
@@ -815,12 +770,12 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
fyz[i] = fyz[i] - Gamxyz[i] * Lapx[i] - Gamyyz[i] * Lapy[i] - Gamzyz[i] * Lapz[i];
|
||||
}
|
||||
// 1ms //
|
||||
for (int i=start;i<end;i+=1) {
|
||||
for (int i=0;i<all;i+=1) {
|
||||
trK_rhs[i] = gupxx[i] * fxx[i] + gupyy[i] * fyy[i] + gupzz[i] * fzz[i]
|
||||
+ TWO * ( gupxy[i] * fxy[i] + gupxz[i] * fxz[i] + gupyz[i] * fyz[i] );
|
||||
}
|
||||
// 2.5ms //
|
||||
for (int i=start;i<end;i+=1) {
|
||||
for (int i=0;i<all;i+=1) {
|
||||
|
||||
S[i] = chin1[i] * (
|
||||
gupxx[i] * Sxx[i] + gupyy[i] * Syy[i] + gupzz[i] * Szz[i]
|
||||
@@ -879,7 +834,7 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
fzz[i] = alpn1[i] * (Rzz[i] - EIGHT * PI * Szz[i]) - fzz[i];
|
||||
}
|
||||
// 8ms //
|
||||
for (int i=start;i<end;i+=1) {
|
||||
for (int i=0;i<all;i+=1) {
|
||||
|
||||
/* Aij_rhs = fij - gij * f */
|
||||
Axx_rhs[i] = fxx[i] - gxx[i] * f[i];
|
||||
@@ -1011,7 +966,7 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
}
|
||||
|
||||
// 1ms //
|
||||
for(int i=start;i<end;i+=1){
|
||||
for(int i=0;i<all;i+=1){
|
||||
betax_rhs[i] = FF * dtSfx[i];
|
||||
betay_rhs[i] = FF * dtSfy[i];
|
||||
betaz_rhs[i] = FF * dtSfz[i];
|
||||
@@ -1036,101 +991,60 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
#endif
|
||||
}
|
||||
// 26ms //
|
||||
for(int _task=tid; _task<16; _task+=nthr){
|
||||
switch(_task){
|
||||
case 0:
|
||||
lopsided(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS);
|
||||
lopsided(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA);
|
||||
break;
|
||||
case 1:
|
||||
lopsided(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS);
|
||||
lopsided(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS);
|
||||
break;
|
||||
case 2:
|
||||
lopsided(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA);
|
||||
lopsided(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS);
|
||||
break;
|
||||
case 3:
|
||||
lopsided(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS);
|
||||
lopsided(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS);
|
||||
break;
|
||||
case 4:
|
||||
lopsided(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA);
|
||||
lopsided(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA);
|
||||
break;
|
||||
case 5:
|
||||
lopsided(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS);
|
||||
lopsided(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS);
|
||||
break;
|
||||
case 6:
|
||||
lopsided(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS);
|
||||
lopsided(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS);
|
||||
break;
|
||||
case 7:
|
||||
lopsided(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS);
|
||||
lopsided(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA);
|
||||
break;
|
||||
case 8: lopsided(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA); break;
|
||||
case 9: lopsided(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS); break;
|
||||
case 10: lopsided(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA); break;
|
||||
case 11: lopsided(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS); break;
|
||||
case 12: lopsided(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS); break;
|
||||
case 13: lopsided(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS); break;
|
||||
case 14: lopsided(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS); break;
|
||||
case 15: lopsided(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS); break;
|
||||
}
|
||||
}
|
||||
lopsided(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS);
|
||||
lopsided(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA);
|
||||
lopsided(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS);
|
||||
lopsided(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS);
|
||||
lopsided(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA);
|
||||
lopsided(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS);
|
||||
lopsided(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS);
|
||||
lopsided(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS);
|
||||
lopsided(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA);
|
||||
lopsided(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA);
|
||||
lopsided(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS);
|
||||
lopsided(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS);
|
||||
lopsided(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS);
|
||||
lopsided(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS);
|
||||
lopsided(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS);
|
||||
lopsided(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA);
|
||||
lopsided(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA);
|
||||
lopsided(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS);
|
||||
lopsided(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA);
|
||||
lopsided(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS);
|
||||
lopsided(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS);
|
||||
lopsided(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS);
|
||||
lopsided(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS);
|
||||
lopsided(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS);
|
||||
// 20ms //
|
||||
#pragma omp barrier
|
||||
if(eps>0){
|
||||
for(int _task=tid; _task<16; _task+=nthr){
|
||||
switch(_task){
|
||||
case 0: kodis(ex,X,Y,Z,chi,chi_rhs,SSS,Symmetry,eps); break;
|
||||
case 1: kodis(ex,X,Y,Z,trK,trK_rhs,SSS,Symmetry,eps); break;
|
||||
case 2: kodis(ex,X,Y,Z,dxx,gxx_rhs,SSS,Symmetry,eps); break;
|
||||
case 3: kodis(ex,X,Y,Z,gxy,gxy_rhs,AAS,Symmetry,eps); break;
|
||||
case 4: kodis(ex,X,Y,Z,gxz,gxz_rhs,ASA,Symmetry,eps); break;
|
||||
case 5: kodis(ex,X,Y,Z,dyy,gyy_rhs,SSS,Symmetry,eps); break;
|
||||
case 6: kodis(ex,X,Y,Z,gyz,gyz_rhs,SAA,Symmetry,eps); break;
|
||||
case 7: kodis(ex,X,Y,Z,dzz,gzz_rhs,SSS,Symmetry,eps); break;
|
||||
case 8:
|
||||
kodis(ex,X,Y,Z,Axx,Axx_rhs,SSS,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,SSA,Symmetry,eps);
|
||||
break;
|
||||
case 9:
|
||||
kodis(ex,X,Y,Z,Axy,Axy_rhs,AAS,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,SAS,Symmetry,eps);
|
||||
break;
|
||||
case 10:
|
||||
kodis(ex,X,Y,Z,Axz,Axz_rhs,ASA,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,ASS,Symmetry,eps);
|
||||
break;
|
||||
case 11:
|
||||
kodis(ex,X,Y,Z,Ayy,Ayy_rhs,SSS,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,betaz,betaz_rhs,SSA,Symmetry,eps);
|
||||
break;
|
||||
case 12:
|
||||
kodis(ex,X,Y,Z,Ayz,Ayz_rhs,SAA,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,betay,betay_rhs,SAS,Symmetry,eps);
|
||||
break;
|
||||
case 13:
|
||||
kodis(ex,X,Y,Z,Azz,Azz_rhs,SSS,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,betax,betax_rhs,ASS,Symmetry,eps);
|
||||
break;
|
||||
case 14:
|
||||
kodis(ex,X,Y,Z,Gamx,Gamx_rhs,ASS,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,Lap,Lap_rhs,SSS,Symmetry,eps);
|
||||
break;
|
||||
case 15:
|
||||
kodis(ex,X,Y,Z,Gamy,Gamy_rhs,SAS,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,Gamz,Gamz_rhs,SSA,Symmetry,eps);
|
||||
break;
|
||||
}
|
||||
}
|
||||
kodis(ex,X,Y,Z,chi,chi_rhs,SSS,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,trK,trK_rhs,SSS,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,dxx,gxx_rhs,SSS,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,gxy,gxy_rhs,AAS,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,gxz,gxz_rhs,ASA,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,dyy,gyy_rhs,SSS,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,gyz,gyz_rhs,SAA,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,dzz,gzz_rhs,SSS,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,Axx,Axx_rhs,SSS,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,SSA,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,Axy,Axy_rhs,AAS,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,SAS,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,Axz,Axz_rhs,ASA,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,ASS,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,Ayy,Ayy_rhs,SSS,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,betaz,betaz_rhs,SSA,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,Ayz,Ayz_rhs,SAA,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,betay,betay_rhs,SAS,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,Azz,Azz_rhs,SSS,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,betax,betax_rhs,ASS,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,Gamx,Gamx_rhs,ASS,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,Lap,Lap_rhs,SSS,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,Gamy,Gamy_rhs,SAS,Symmetry,eps);
|
||||
kodis(ex,X,Y,Z,Gamz,Gamz_rhs,SSA,Symmetry,eps);
|
||||
}
|
||||
// 2ms //
|
||||
if(co==0){
|
||||
for (int i=start;i<end;i+=1) {
|
||||
for (int i=0;i<all;i+=1) {
|
||||
ham_Res[i] =
|
||||
gupxx[i] * Rxx[i] + gupyy[i] * Ryy[i] + gupzz[i] * Rzz[i]
|
||||
+ TWO * ( gupxy[i] * Rxy[i] + gupxz[i] * Rxz[i] + gupyz[i] * Ryz[i] );
|
||||
@@ -1174,20 +1088,15 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
}
|
||||
|
||||
// 1ms //
|
||||
for(int _task=tid; _task<6; _task+=nthr){
|
||||
switch(_task){
|
||||
case 0: fderivs(ex,Axx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0); break;
|
||||
case 1: fderivs(ex,Axy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,0); break;
|
||||
case 2: fderivs(ex,Axz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,0); break;
|
||||
case 3: fderivs(ex,Ayy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0); break;
|
||||
case 4: fderivs(ex,Ayz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,0); break;
|
||||
case 5: fderivs(ex,Azz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0); break;
|
||||
}
|
||||
}
|
||||
fderivs(ex,Axx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0);
|
||||
fderivs(ex,Axy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,0);
|
||||
fderivs(ex,Axz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,0);
|
||||
fderivs(ex,Ayy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0);
|
||||
fderivs(ex,Ayz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,0);
|
||||
fderivs(ex,Azz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0);
|
||||
}
|
||||
// 7ms //
|
||||
#pragma omp barrier
|
||||
for (int i=start;i<end;i+=1) {
|
||||
for (int i=0;i<all;i+=1) {
|
||||
gxxx[i] = gxxx[i] - ( Gamxxx[i] * Axx[i] + Gamyxx[i] * Axy[i] + Gamzxx[i] * Axz[i]
|
||||
+ Gamxxx[i] * Axx[i] + Gamyxx[i] * Axy[i] + Gamzxx[i] * Axz[i]) - chix[i]*Axx[i]/chin1[i];
|
||||
gxyx[i] = gxyx[i] - ( Gamxxy[i] * Axx[i] + Gamyxy[i] * Axy[i] + Gamzxy[i] * Axz[i]
|
||||
@@ -1240,10 +1149,7 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
movz_Res[i] = movz_Res[i] - F2o3*Kz[i] - F8*PI*Sz[i];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// double t1 = omp_get_wtime();
|
||||
// printf("Time for BSSN RHS computation: %.6f seconds\n", (t1-t0));
|
||||
|
||||
return 0; // success
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user