Add resident-GA mode switch and simplify sync logic
This commit is contained in:
@@ -3,6 +3,7 @@
|
|||||||
#include <typeinfo>
|
#include <typeinfo>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
#include <cstdlib>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
@@ -54,10 +55,6 @@ using namespace std;
|
|||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
static const int k_bssn_cuda_bh_state_indices[3] = {18, 19, 20};
|
static const int k_bssn_cuda_bh_state_indices[3] = {18, 19, 20};
|
||||||
static const int k_bssn_cuda_ga_state_indices[12] = {
|
|
||||||
2, 3, 4, 5, 6, 7,
|
|
||||||
8, 9, 10, 11, 12, 13
|
|
||||||
};
|
|
||||||
|
|
||||||
bool fill_bssn_cuda_views(Block *cg, MyList<var> *vars,
|
bool fill_bssn_cuda_views(Block *cg, MyList<var> *vars,
|
||||||
double **host_views,
|
double **host_views,
|
||||||
@@ -107,20 +104,6 @@ bool bssn_cuda_sync_subset(Block *cg,
|
|||||||
return bssn_cuda_download_state_subset(cg, cg->shape, subset_count, state_indices, host_views) == 0;
|
return bssn_cuda_download_state_subset(cg, cg->shape, subset_count, state_indices, host_views) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool bssn_cuda_sync_ga_fields(Block *cg, MyList<var> *vars, bool upload)
|
|
||||||
{
|
|
||||||
double *ga_fields[12];
|
|
||||||
int idx = 0;
|
|
||||||
while (vars && idx < 12)
|
|
||||||
{
|
|
||||||
ga_fields[idx++] = cg->fgfs[vars->data->sgfn];
|
|
||||||
vars = vars->next;
|
|
||||||
}
|
|
||||||
if (idx != 12)
|
|
||||||
return false;
|
|
||||||
return bssn_cuda_sync_subset(cg, 12, k_bssn_cuda_ga_state_indices, ga_fields, upload);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool bssn_cuda_sync_bh_fields(Block *cg, var *forx, var *fory, var *forz, bool upload)
|
bool bssn_cuda_sync_bh_fields(Block *cg, var *forx, var *fory, var *forz, bool upload)
|
||||||
{
|
{
|
||||||
double *bh_fields[3] = {
|
double *bh_fields[3] = {
|
||||||
@@ -3255,24 +3238,6 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
bool used_gpu_substep = false;
|
bool used_gpu_substep = false;
|
||||||
bool used_gpu_resident_state = false;
|
bool used_gpu_resident_state = false;
|
||||||
#if USE_CUDA_BSSN
|
#if USE_CUDA_BSSN
|
||||||
if (use_cuda_resident_sync)
|
|
||||||
{
|
|
||||||
if (!bssn_cuda_sync_ga_fields(cg, StateList->next->next, false))
|
|
||||||
{
|
|
||||||
cout << "CUDA predictor GA subset download failed" << endl;
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
|
||||||
f_enforce_ga(cg->shape,
|
|
||||||
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
|
||||||
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
|
||||||
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
|
|
||||||
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
|
|
||||||
if (!bssn_cuda_sync_ga_fields(cg, StateList->next->next, true))
|
|
||||||
{
|
|
||||||
cout << "CUDA predictor GA subset upload failed" << endl;
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
{
|
||||||
double *state_in[BSSN_CUDA_STATE_COUNT];
|
double *state_in[BSSN_CUDA_STATE_COUNT];
|
||||||
double *state_out[BSSN_CUDA_STATE_COUNT];
|
double *state_out[BSSN_CUDA_STATE_COUNT];
|
||||||
@@ -3292,7 +3257,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
int keep_resident_state = use_cuda_resident_sync ? 1 : 0;
|
int keep_resident_state = use_cuda_resident_sync ? 1 : 0;
|
||||||
int apply_enforce_ga = 0;
|
int apply_enforce_ga = 0;
|
||||||
#if (AGM == 0)
|
#if (AGM == 0)
|
||||||
apply_enforce_ga = use_cuda_resident_sync ? 0 : 1;
|
apply_enforce_ga = 1;
|
||||||
#endif
|
#endif
|
||||||
#if (SommerType == 0)
|
#if (SommerType == 0)
|
||||||
#ifndef WithShell
|
#ifndef WithShell
|
||||||
@@ -3706,24 +3671,6 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
bool used_gpu_substep = false;
|
bool used_gpu_substep = false;
|
||||||
bool used_gpu_resident_state = false;
|
bool used_gpu_resident_state = false;
|
||||||
#if USE_CUDA_BSSN
|
#if USE_CUDA_BSSN
|
||||||
if (use_cuda_resident_sync)
|
|
||||||
{
|
|
||||||
if (!bssn_cuda_sync_ga_fields(cg, SynchList_pre->next->next, false))
|
|
||||||
{
|
|
||||||
cout << "CUDA corrector GA subset download failed" << endl;
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
|
||||||
f_enforce_ga(cg->shape,
|
|
||||||
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
|
|
||||||
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
|
|
||||||
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
|
|
||||||
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
|
|
||||||
if (!bssn_cuda_sync_ga_fields(cg, SynchList_pre->next->next, true))
|
|
||||||
{
|
|
||||||
cout << "CUDA corrector GA subset upload failed" << endl;
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
{
|
||||||
double *state_in[BSSN_CUDA_STATE_COUNT];
|
double *state_in[BSSN_CUDA_STATE_COUNT];
|
||||||
double *state_out[BSSN_CUDA_STATE_COUNT];
|
double *state_out[BSSN_CUDA_STATE_COUNT];
|
||||||
@@ -3743,9 +3690,9 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
int keep_resident_state = use_cuda_resident_sync ? 1 : 0;
|
int keep_resident_state = use_cuda_resident_sync ? 1 : 0;
|
||||||
int apply_enforce_ga = 0;
|
int apply_enforce_ga = 0;
|
||||||
#if (AGM == 0)
|
#if (AGM == 0)
|
||||||
apply_enforce_ga = use_cuda_resident_sync ? 0 : 1;
|
apply_enforce_ga = 1;
|
||||||
#elif (AGM == 1)
|
#elif (AGM == 1)
|
||||||
apply_enforce_ga = (iter_count == 3 && !use_cuda_resident_sync) ? 1 : 0;
|
apply_enforce_ga = use_cuda_resident_sync ? 1 : ((iter_count == 3) ? 1 : 0);
|
||||||
#endif
|
#endif
|
||||||
#if (SommerType == 0)
|
#if (SommerType == 0)
|
||||||
#ifndef WithShell
|
#ifndef WithShell
|
||||||
|
|||||||
@@ -2477,7 +2477,7 @@ __global__ void kern_enforce_ga_cuda(double * __restrict__ dxx,
|
|||||||
- lgxy * lgxy * lgzz
|
- lgxy * lgxy * lgzz
|
||||||
- lgxx * lgyz * lgyz;
|
- lgxx * lgyz * lgyz;
|
||||||
|
|
||||||
lscale = ONE / pow(lscale, F1O3);
|
lscale = ONE / cbrt(lscale);
|
||||||
|
|
||||||
lgxx *= lscale;
|
lgxx *= lscale;
|
||||||
lgxy *= lscale;
|
lgxy *= lscale;
|
||||||
|
|||||||
Reference in New Issue
Block a user