Trim GPU restrict sync overhead
This commit is contained in:
@@ -2016,25 +2016,6 @@ int bssn_cuda_prolong3_pack(int wei,
|
||||
if (!launch_kernel(grid, block, (const void *)prolong3_cell_kernel, args))
|
||||
return 1;
|
||||
|
||||
cudaError_t sync_err = cudaDeviceSynchronize();
|
||||
if (sync_err != cudaSuccess)
|
||||
{
|
||||
std::fprintf(stderr,
|
||||
"prolong3 debug: symmetry=%d extc=(%d,%d,%d) extf=(%d,%d,%d) "
|
||||
"imino=%d imaxo=%d jmino=%d jmaxo=%d kmino=%d kmaxo=%d "
|
||||
"ic_min=%d ic_max=%d jc_min=%d jc_max=%d kc_min=%d kc_max=%d "
|
||||
"lbc=(%d,%d,%d) lbf=(%d,%d,%d)\n",
|
||||
symmetry,
|
||||
extc[0], extc[1], extc[2],
|
||||
extf[0], extf[1], extf[2],
|
||||
imino, imaxo, jmino, jmaxo, kmino, kmaxo,
|
||||
ic_min, ic_max, jc_min, jc_max, kc_min, kc_max,
|
||||
lbc[0], lbc[1], lbc[2],
|
||||
lbf[0], lbf[1], lbf[2]);
|
||||
report_cuda_error("cudaDeviceSynchronize prolong3", sync_err);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int host_error_flag = 0;
|
||||
err = cudaMemcpy(&host_error_flag, cache.error_flag.ptr, sizeof(int), cudaMemcpyDeviceToHost);
|
||||
if (err != cudaSuccess)
|
||||
@@ -2241,28 +2222,6 @@ int bssn_cuda_restrict3_pack(int wei,
|
||||
if (!launch_kernel(grid, block, (const void *)restrict3_cell_kernel, args))
|
||||
return 1;
|
||||
|
||||
cudaError_t sync_err = cudaDeviceSynchronize();
|
||||
if (sync_err != cudaSuccess)
|
||||
{
|
||||
std::fprintf(stderr,
|
||||
"restrict3 debug: symmetry=%d extc=(%d,%d,%d) extf=(%d,%d,%d) "
|
||||
"imino=%d imaxo=%d jmino=%d jmaxo=%d kmino=%d kmaxo=%d "
|
||||
"imini=%d imaxi=%d jmini=%d jmaxi=%d kmini=%d kmaxi=%d "
|
||||
"lbc=(%d,%d,%d) lbf=(%d,%d,%d) "
|
||||
"fi=[%d,%d] fj=[%d,%d] fk=[%d,%d] window=[%d:%d,%d:%d,%d:%d]\n",
|
||||
symmetry,
|
||||
extc[0], extc[1], extc[2],
|
||||
extf[0], extf[1], extf[2],
|
||||
imino, imaxo, jmino, jmaxo, kmino, kmaxo,
|
||||
imini, imaxi, jmini, jmaxi, kmini, kmaxi,
|
||||
lbc[0], lbc[1], lbc[2],
|
||||
lbf[0], lbf[1], lbf[2],
|
||||
fi_min, fi_max, fj_min, fj_max, fk_min, fk_max,
|
||||
ii_lo, ii_hi, jj_lo, jj_hi, kk_lo, kk_hi);
|
||||
report_cuda_error("cudaDeviceSynchronize restrict3", sync_err);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int host_error_flag = 0;
|
||||
err = cudaMemcpy(&host_error_flag, cache.error_flag.ptr, sizeof(int), cudaMemcpyDeviceToHost);
|
||||
if (err != cudaSuccess)
|
||||
|
||||
Reference in New Issue
Block a user