From f147f79ffa9902e69f2ef89735522f340dc11d4f Mon Sep 17 00:00:00 2001 From: jaunatisblue Date: Thu, 26 Feb 2026 09:40:46 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9block=E5=88=92=E5=88=86?= =?UTF-8?q?=EF=BC=8C=E5=AF=B9=E8=B4=9F=E8=BD=BD=E9=AB=98=E7=9A=84rank?= =?UTF-8?q?=E6=89=80=E5=9C=A8block=E8=BF=9B=E8=A1=8C=E5=88=92=E5=88=86?= =?UTF-8?q?=EF=BC=8C=E6=B7=BB=E5=8A=A0=E5=88=B0=E7=A9=BArank=EF=BC=8C?= =?UTF-8?q?=E7=A9=BArank=E6=98=AF=E5=B9=B3=E7=A7=BB=E5=BE=97=E5=88=B0?= =?UTF-8?q?=E7=9A=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- AMSS_NCKU_source/MPatch.C | 3492 ++++--- AMSS_NCKU_source/NullShellPatch.h | 1 + AMSS_NCKU_source/Parallel.C | 13612 ++++++++++++++------------ AMSS_NCKU_source/Parallel.h | 448 +- AMSS_NCKU_source/cgh.C | 3546 +++---- AMSS_NCKU_source/cgh.h | 199 +- AMSS_NCKU_source/surface_integral.C | 7501 +++++++------- 7 files changed, 14791 insertions(+), 14008 deletions(-) diff --git a/AMSS_NCKU_source/MPatch.C b/AMSS_NCKU_source/MPatch.C index e712a74..b3dc6bd 100644 --- a/AMSS_NCKU_source/MPatch.C +++ b/AMSS_NCKU_source/MPatch.C @@ -1,1762 +1,1732 @@ - -#include -#include -#include -#include -#include -#include -#include -#include -using namespace std; - -#include "misc.h" -#include "MPatch.h" -#include "Parallel.h" -#include "fmisc.h" - -Patch::Patch(int DIM, int *shapei, double *bboxi, int levi, bool buflog, int Symmetry) : lev(levi) -{ - - int hbuffer_width = buffer_width; - if (lev == 0) - hbuffer_width = CS_width; // specific for shell-box coulping - - if (DIM != dim) - { - cout << "dimension is not consistent in Patch construction" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - for (int i = 0; i < dim; i++) - { - shape[i] = shapei[i]; - bbox[i] = bboxi[i]; - bbox[dim + i] = bboxi[dim + i]; - lli[i] = uui[i] = 0; - if (buflog) - { - double DH; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - DH = (bbox[dim + i] - bbox[i]) / (shape[i] - 1); -#else -#ifdef Cell - DH = (bbox[dim + i] - bbox[i]) / shape[i]; -#else -#error Not define Vertex nor Cell -#endif -#endif - uui[i] = hbuffer_width; - bbox[dim + i] = bbox[dim + i] + uui[i] * DH; - shape[i] = shape[i] + uui[i]; - } - } - - if (buflog) - { - if (DIM != 3) - { - cout << "Symmetry in Patch construction only support 3 yet but dim = " << DIM << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - double tmpb, DH; - if (Symmetry > 0) - { -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - DH = (bbox[5] - bbox[2]) / (shape[2] - 1); -#else -#ifdef Cell - DH = (bbox[5] - bbox[2]) / shape[2]; -#else -#error Not define Vertex nor Cell -#endif -#endif - tmpb = Mymax(0, bbox[2] - hbuffer_width * DH); - lli[2] = int((bbox[2] - tmpb) / DH + 0.4); - bbox[2] = bbox[2] - lli[2] * DH; - shape[2] = shape[2] + lli[2]; - if (lli[2] < hbuffer_width) - { - if (feq(bbox[2], 0, DH / 2)) - lli[2] = 0; - else - { - cout << "Code mistake for lli[2] = " << lli[2] << ", bbox[2] = " << bbox[2] << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - if (Symmetry > 1) - { -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - DH = (bbox[3] - bbox[0]) / (shape[0] - 1); -#else -#ifdef Cell - DH = (bbox[3] - bbox[0]) / shape[0]; -#else -#error Not define Vertex nor Cell -#endif -#endif - tmpb = Mymax(0, bbox[0] - hbuffer_width * DH); - lli[0] = int((bbox[0] - tmpb) / DH + 0.4); - bbox[0] = bbox[0] - lli[0] * DH; - shape[0] = shape[0] + lli[0]; - if (lli[0] < hbuffer_width) - { - if (feq(bbox[0], 0, DH / 2)) - lli[0] = 0; - else - { - cout << "Code mistake for lli[0] = " << lli[0] << ", bbox[0] = " << bbox[0] << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - DH = (bbox[4] - bbox[1]) / (shape[1] - 1); -#else -#ifdef Cell - DH = (bbox[4] - bbox[1]) / shape[1]; -#else -#error Not define Vertex nor Cell -#endif -#endif - tmpb = Mymax(0, bbox[1] - hbuffer_width * DH); - lli[1] = int((bbox[1] - tmpb) / DH + 0.4); - bbox[1] = bbox[1] - lli[1] * DH; - shape[1] = shape[1] + lli[1]; - if (lli[1] < hbuffer_width) - { - if (feq(bbox[1], 0, DH / 2)) - lli[1] = 0; - else - { - cout << "Code mistake for lli[1] = " << lli[1] << ", bbox[1] = " << bbox[1] << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - } - else - { - for (int i = 0; i < 2; i++) - { -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - DH = (bbox[dim + i] - bbox[i]) / (shape[i] - 1); -#else -#ifdef Cell - DH = (bbox[dim + i] - bbox[i]) / shape[i]; -#else -#error Not define Vertex nor Cell -#endif -#endif - lli[i] = hbuffer_width; - bbox[i] = bbox[i] - lli[i] * DH; - shape[i] = shape[i] + lli[i]; - } - } - } - else - { - for (int i = 0; i < dim; i++) - { -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - DH = (bbox[dim + i] - bbox[i]) / (shape[i] - 1); -#else -#ifdef Cell - DH = (bbox[dim + i] - bbox[i]) / shape[i]; -#else -#error Not define Vertex nor Cell -#endif -#endif - lli[i] = hbuffer_width; - bbox[i] = bbox[i] - lli[i] * DH; - shape[i] = shape[i] + lli[i]; - } - } - } - - blb = ble = 0; -} -Patch::~Patch() -{ -} -// buflog 1: with buffer points; 0 without -void Patch::checkPatch(bool buflog) -{ - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - if (myrank == 0) - { - if (buflog) - { - cout << " belong to level " << lev << endl; - cout << " shape: ["; - for (int i = 0; i < dim; i++) - { - cout << shape[i]; - if (i < dim - 1) - cout << ","; - else - cout << "]"; - } - cout << " resolution: ["; - for (int i = 0; i < dim; i++) - { - cout << getdX(i); - if (i < dim - 1) - cout << ","; - else - cout << "]" << endl; - } - cout << " range:" << "("; - for (int i = 0; i < dim; i++) - { - cout << bbox[i] << ":" << bbox[dim + i]; - if (i < dim - 1) - cout << ","; - else - cout << ")" << endl; - } - } - else - { - cout << " belong to level " << lev << endl; - cout << " shape: ["; - for (int i = 0; i < dim; i++) - { - cout << shape[i] - lli[i] - uui[i]; - if (i < dim - 1) - cout << ","; - else - cout << "]"; - } - cout << " resolution: ["; - for (int i = 0; i < dim; i++) - { - cout << getdX(i); - if (i < dim - 1) - cout << ","; - else - cout << "]" << endl; - } - cout << " range:" << "("; - for (int i = 0; i < dim; i++) - { - cout << bbox[i] + lli[i] * getdX(i) << ":" << bbox[dim + i] - uui[i] * getdX(i); - if (i < dim - 1) - cout << ","; - else - cout << ")" << endl; - } - } - } -} -void Patch::checkPatch(bool buflog, const int out_rank) -{ - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - if (myrank == out_rank) - { - cout << " out_rank = " << out_rank << endl; - if (buflog) - { - cout << " belong to level " << lev << endl; - cout << " shape: ["; - for (int i = 0; i < dim; i++) - { - cout << shape[i]; - if (i < dim - 1) - cout << ","; - else - cout << "]"; - } - cout << " resolution: ["; - for (int i = 0; i < dim; i++) - { - cout << getdX(i); - if (i < dim - 1) - cout << ","; - else - cout << "]" << endl; - } - cout << " range:" << "("; - for (int i = 0; i < dim; i++) - { - cout << bbox[i] << ":" << bbox[dim + i]; - if (i < dim - 1) - cout << ","; - else - cout << ")" << endl; - } - } - else - { - cout << " belong to level " << lev << endl; - cout << " shape: ["; - for (int i = 0; i < dim; i++) - { - cout << shape[i] - lli[i] - uui[i]; - if (i < dim - 1) - cout << ","; - else - cout << "]"; - } - cout << " resolution: ["; - for (int i = 0; i < dim; i++) - { - cout << getdX(i); - if (i < dim - 1) - cout << ","; - else - cout << "]" << endl; - } - cout << " range:" << "("; - for (int i = 0; i < dim; i++) - { - cout << bbox[i] + lli[i] * getdX(i) << ":" << bbox[dim + i] - uui[i] * getdX(i); - if (i < dim - 1) - cout << ","; - else - cout << ")" << endl; - } - } - } -} -void Patch::Interp_Points(MyList *VarList, - int NN, double **XX, - double *Shellf, int Symmetry) -{ - // NOTE: we do not Synchnize variables here, make sure of that before calling this routine - double t_calc_end, t_calc_total = 0; - double t_calc_start = MPI_Wtime(); - int myrank, nprocs; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs); - - int ordn = 2 * ghost_width; - MyList *varl; - int num_var = 0; - varl = VarList; - while (varl) - { - num_var++; - varl = varl->next; - } - - memset(Shellf, 0, sizeof(double) * NN * num_var); - - // owner_rank[j] records which MPI rank owns point j - // All ranks traverse the same block list so they all agree on ownership - int *owner_rank; - owner_rank = new int[NN]; - for (int j = 0; j < NN; j++) - owner_rank[j] = -1; - - double DH[dim], llb[dim], uub[dim]; - for (int i = 0; i < dim; i++) - DH[i] = getdX(i); - - for (int j = 0; j < NN; j++) // run along points - { - double pox[dim]; - for (int i = 0; i < dim; i++) - { - pox[i] = XX[i][j]; - if (myrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i])) - { - cout << "Patch::Interp_Points: point ("; - for (int k = 0; k < dim; k++) - { - cout << XX[k][j]; - if (k < dim - 1) - cout << ","; - else - cout << ") is out of current Patch." << endl; - } - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - - MyList *Bp = blb; - bool notfind = true; - while (notfind && Bp) // run along Blocks - { - Block *BP = Bp->data; - - bool flag = true; - for (int i = 0; i < dim; i++) - { -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i]; - uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i]; -#else -#ifdef Cell - llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i]; - uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i]; -#else -#error Not define Vertex nor Cell -#endif -#endif - if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2) - { - flag = false; - break; - } - } - - if (flag) - { - notfind = false; - owner_rank[j] = BP->rank; - if (myrank == BP->rank) - { - //---> interpolation - varl = VarList; - int k = 0; - while (varl) // run along variables - { - f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k], - pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); - varl = varl->next; - k++; - } - } - } - if (Bp == ble) - break; - Bp = Bp->next; - } - } - t_calc_end = MPI_Wtime(); - t_calc_total = t_calc_end - t_calc_start; - // Replace MPI_Allreduce with per-owner MPI_Bcast: - // Group consecutive points by owner rank and broadcast each group. - // Since each point's data is non-zero only on the owner rank, - // Bcast from owner is equivalent to Allreduce(MPI_SUM) but much cheaper. - { - int j = 0; - while (j < NN) - { - int cur_owner = owner_rank[j]; - if (cur_owner < 0) - { - if (myrank == 0) - { - cout << "ERROR: Patch::Interp_Points fails to find point ("; - for (int d = 0; d < dim; d++) - { - cout << XX[d][j]; - if (d < dim - 1) - cout << ","; - else - cout << ")"; - } - cout << " on Patch ("; - for (int d = 0; d < dim; d++) - { - cout << bbox[d] << "+" << lli[d] * DH[d]; - if (d < dim - 1) - cout << ","; - else - cout << ")--"; - } - cout << "("; - for (int d = 0; d < dim; d++) - { - cout << bbox[dim + d] << "-" << uui[d] * DH[d]; - if (d < dim - 1) - cout << ","; - else - cout << ")" << endl; - } - MPI_Abort(MPI_COMM_WORLD, 1); - } - j++; - continue; - } - // Find contiguous run of points with the same owner - int jstart = j; - while (j < NN && owner_rank[j] == cur_owner) - j++; - int count = (j - jstart) * num_var; - MPI_Bcast(Shellf + jstart * num_var, count, MPI_DOUBLE, cur_owner, MPI_COMM_WORLD); - } - } - - delete[] owner_rank; - - - - // 4. 汇总并输出真正干活最慢的 Top 10 - struct RankStats { - int rank; - double calc_time; // 净计算时间 - double comm_time; // 等待时间 - }; - - // 创建当前进程的统计数据 - RankStats local_stat; - local_stat.rank = myrank; - local_stat.calc_time = t_calc_total; - local_stat.comm_time = 0; // 此函数中未跟踪通信时间 - - // 为所有进程的统计数据分配内存 - RankStats *all_stats = nullptr; - if (myrank == 0) { - all_stats = new RankStats[nprocs]; - } - - // 使用MPI_Gather收集所有进程的数据到rank 0 - MPI_Gather(&local_stat, sizeof(RankStats), MPI_BYTE, - all_stats, sizeof(RankStats), MPI_BYTE, - 0, MPI_COMM_WORLD); - - if (myrank == 0) { - // 按 calc_time(净计算时间)排序 - std::sort(all_stats, all_stats + nprocs, [](const RankStats& a, const RankStats& b) { - return a.calc_time > b.calc_time; - }); - - printf("\n--- Top 10 Ranks by ACTIVE COMPUTATION (CPU Time) ---\n"); - int display_count = (nprocs < 10) ? nprocs : 10; - for (int i = 0; i < display_count; i++) { - printf("Rank [%4d]: Calc %.6f s\n", - all_stats[i].rank, all_stats[i].calc_time); - } - - // 清理分配的内存 - delete[] all_stats; - } -} -void Patch::Interp_Points(MyList *VarList, - int NN, double **XX, - double *Shellf, int Symmetry, - int Nmin_consumer, int Nmax_consumer) -{ - // Targeted point-to-point overload: each owner sends each point only to - // the one rank that needs it for integration (consumer), reducing - // communication volume by ~nprocs times compared to the Bcast version. - double t_calc_end, t_calc_total = 0; - double t_calc_start = MPI_Wtime(); - int myrank, nprocs; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs); - - int ordn = 2 * ghost_width; - MyList *varl; - int num_var = 0; - varl = VarList; - while (varl) - { - num_var++; - varl = varl->next; - } - - memset(Shellf, 0, sizeof(double) * NN * num_var); - - // owner_rank[j] records which MPI rank owns point j - int *owner_rank; - owner_rank = new int[NN]; - for (int j = 0; j < NN; j++) - owner_rank[j] = -1; - - double DH[dim], llb[dim], uub[dim]; - for (int i = 0; i < dim; i++) - DH[i] = getdX(i); - - // --- Interpolation phase (identical to original) --- - for (int j = 0; j < NN; j++) - { - double pox[dim]; - for (int i = 0; i < dim; i++) - { - pox[i] = XX[i][j]; - if (myrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i])) - { - cout << "Patch::Interp_Points: point ("; - for (int k = 0; k < dim; k++) - { - cout << XX[k][j]; - if (k < dim - 1) - cout << ","; - else - cout << ") is out of current Patch." << endl; - } - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - - MyList *Bp = blb; - bool notfind = true; - while (notfind && Bp) - { - Block *BP = Bp->data; - - bool flag = true; - for (int i = 0; i < dim; i++) - { -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i]; - uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i]; -#else -#ifdef Cell - llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i]; - uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i]; -#else -#error Not define Vertex nor Cell -#endif -#endif - if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2) - { - flag = false; - break; - } - } - - if (flag) - { - notfind = false; - owner_rank[j] = BP->rank; - if (myrank == BP->rank) - { - varl = VarList; - int k = 0; - while (varl) - { - f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k], - pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); - varl = varl->next; - k++; - } - } - } - if (Bp == ble) - break; - Bp = Bp->next; - } - } - t_calc_end = MPI_Wtime(); - t_calc_total = t_calc_end - t_calc_start; - // --- Error check for unfound points --- - for (int j = 0; j < NN; j++) - { - if (owner_rank[j] < 0 && myrank == 0) - { - cout << "ERROR: Patch::Interp_Points fails to find point ("; - for (int d = 0; d < dim; d++) - { - cout << XX[d][j]; - if (d < dim - 1) - cout << ","; - else - cout << ")"; - } - cout << " on Patch ("; - for (int d = 0; d < dim; d++) - { - cout << bbox[d] << "+" << lli[d] * DH[d]; - if (d < dim - 1) - cout << ","; - else - cout << ")--"; - } - cout << "("; - for (int d = 0; d < dim; d++) - { - cout << bbox[dim + d] << "-" << uui[d] * DH[d]; - if (d < dim - 1) - cout << ","; - else - cout << ")" << endl; - } - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - - // --- Targeted point-to-point communication phase --- - // Compute consumer_rank[j] using the same deterministic formula as surface_integral - int *consumer_rank = new int[NN]; - { - int mp = NN / nprocs; - int Lp = NN - nprocs * mp; - for (int j = 0; j < NN; j++) - { - if (j < Lp * (mp + 1)) - consumer_rank[j] = j / (mp + 1); - else - consumer_rank[j] = Lp + (j - Lp * (mp + 1)) / mp; - } - } - - // Count sends and recvs per rank - int *send_count = new int[nprocs]; - int *recv_count = new int[nprocs]; - memset(send_count, 0, sizeof(int) * nprocs); - memset(recv_count, 0, sizeof(int) * nprocs); - - for (int j = 0; j < NN; j++) - { - int own = owner_rank[j]; - int con = consumer_rank[j]; - if (own == con) - continue; // local — no communication needed - if (own == myrank) - send_count[con]++; - if (con == myrank) - recv_count[own]++; - } - - // Build send buffers: for each destination rank, pack (index, data) pairs - // Each entry: 1 int (point index j) + num_var doubles - int total_send = 0, total_recv = 0; - int *send_offset = new int[nprocs]; - int *recv_offset = new int[nprocs]; - for (int r = 0; r < nprocs; r++) - { - send_offset[r] = total_send; - total_send += send_count[r]; - recv_offset[r] = total_recv; - total_recv += recv_count[r]; - } - - // Pack send buffers: each message contains (j, data[0..num_var-1]) per point - int stride = 1 + num_var; // 1 double for index + num_var doubles for data - double *sendbuf = new double[total_send * stride]; - double *recvbuf = new double[total_recv * stride]; - - // Temporary counters for packing - int *pack_pos = new int[nprocs]; - memset(pack_pos, 0, sizeof(int) * nprocs); - - for (int j = 0; j < NN; j++) - { - int own = owner_rank[j]; - int con = consumer_rank[j]; - if (own != myrank || con == myrank) - continue; - int pos = (send_offset[con] + pack_pos[con]) * stride; - sendbuf[pos] = (double)j; // point index - for (int v = 0; v < num_var; v++) - sendbuf[pos + 1 + v] = Shellf[j * num_var + v]; - pack_pos[con]++; - } - - // Post non-blocking recvs and sends - int n_req = 0; - for (int r = 0; r < nprocs; r++) - { - if (recv_count[r] > 0) n_req++; - if (send_count[r] > 0) n_req++; - } - - MPI_Request *reqs = new MPI_Request[n_req]; - int req_idx = 0; - - for (int r = 0; r < nprocs; r++) - { - if (recv_count[r] > 0) - { - MPI_Irecv(recvbuf + recv_offset[r] * stride, - recv_count[r] * stride, MPI_DOUBLE, - r, 0, MPI_COMM_WORLD, &reqs[req_idx++]); - } - } - for (int r = 0; r < nprocs; r++) - { - if (send_count[r] > 0) - { - MPI_Isend(sendbuf + send_offset[r] * stride, - send_count[r] * stride, MPI_DOUBLE, - r, 0, MPI_COMM_WORLD, &reqs[req_idx++]); - } - } - - if (n_req > 0) - MPI_Waitall(n_req, reqs, MPI_STATUSES_IGNORE); - - // Unpack recv buffers into Shellf - for (int i = 0; i < total_recv; i++) - { - int pos = i * stride; - int j = (int)recvbuf[pos]; - for (int v = 0; v < num_var; v++) - Shellf[j * num_var + v] = recvbuf[pos + 1 + v]; - } - - delete[] reqs; - delete[] sendbuf; - delete[] recvbuf; - delete[] pack_pos; - delete[] send_offset; - delete[] recv_offset; - delete[] send_count; - delete[] recv_count; - delete[] consumer_rank; - delete[] owner_rank; - - // 4. 汇总并输出真正干活最慢的 Top 10 - struct RankStats { - int rank; - double calc_time; // 净计算时间 - double comm_time; // 等待时间 - }; - - // 创建当前进程的统计数据 - RankStats local_stat; - local_stat.rank = myrank; - local_stat.calc_time = t_calc_total; - local_stat.comm_time = 0; // 此函数中未跟踪通信时间 - - // 为所有进程的统计数据分配内存 - RankStats *all_stats = nullptr; - if (myrank == 0) { - all_stats = new RankStats[nprocs]; - } - - // 使用MPI_Gather收集所有进程的数据到rank 0 - MPI_Gather(&local_stat, sizeof(RankStats), MPI_BYTE, - all_stats, sizeof(RankStats), MPI_BYTE, - 0, MPI_COMM_WORLD); - - if (myrank == 0) { - // 按 calc_time(净计算时间)排序 - std::sort(all_stats, all_stats + nprocs, [](const RankStats& a, const RankStats& b) { - return a.calc_time > b.calc_time; - }); -/* - printf("\n--- Top 10 Ranks by ACTIVE COMPUTATION (CPU Time) ---\n"); - int display_count = (nprocs < 10) ? nprocs : 10; - for (int i = 0; i < display_count; i++) { - printf("Rank [%4d]: Calc %.6f s\n", - all_stats[i].rank, all_stats[i].calc_time); - }*/ - - // 清理分配的内存 - delete[] all_stats; - } - -} -void Patch::Interp_Points(MyList *VarList, - int NN, double **XX, - double *Shellf, int Symmetry, MPI_Comm Comm_here) -{ - // NOTE: we do not Synchnize variables here, make sure of that before calling this routine - int myrank, lmyrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - MPI_Comm_rank(Comm_here, &lmyrank); - - int ordn = 2 * ghost_width; - MyList *varl; - int num_var = 0; - varl = VarList; - while (varl) - { - num_var++; - varl = varl->next; - } - - memset(Shellf, 0, sizeof(double) * NN * num_var); - - // owner_rank[j] stores the global rank that owns point j - int *owner_rank; - owner_rank = new int[NN]; - for (int j = 0; j < NN; j++) - owner_rank[j] = -1; - - // Build global-to-local rank translation for Comm_here - MPI_Group world_group, local_group; - MPI_Comm_group(MPI_COMM_WORLD, &world_group); - MPI_Comm_group(Comm_here, &local_group); - - double DH[dim], llb[dim], uub[dim]; - for (int i = 0; i < dim; i++) - DH[i] = getdX(i); - - for (int j = 0; j < NN; j++) // run along points - { - double pox[dim]; - for (int i = 0; i < dim; i++) - { - pox[i] = XX[i][j]; - if (lmyrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i])) - { - cout << "Patch::Interp_Points: point ("; - for (int k = 0; k < dim; k++) - { - cout << XX[k][j]; - if (k < dim - 1) - cout << ","; - else - cout << ") is out of current Patch." << endl; - } - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - - MyList *Bp = blb; - bool notfind = true; - while (notfind && Bp) // run along Blocks - { - Block *BP = Bp->data; - - bool flag = true; - for (int i = 0; i < dim; i++) - { -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i]; - uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i]; -#else -#ifdef Cell - llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i]; - uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i]; -#else -#error Not define Vertex nor Cell -#endif -#endif - if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2) - { - flag = false; - break; - } - } - - if (flag) - { - notfind = false; - owner_rank[j] = BP->rank; - if (myrank == BP->rank) - { - //---> interpolation - varl = VarList; - int k = 0; - while (varl) // run along variables - { - f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k], - pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); - varl = varl->next; - k++; - } - } - } - if (Bp == ble) - break; - Bp = Bp->next; - } - } - - // Collect unique global owner ranks and translate to local ranks in Comm_here - // Then broadcast each owner's points via MPI_Bcast on Comm_here - { - int j = 0; - while (j < NN) - { - int cur_owner_global = owner_rank[j]; - if (cur_owner_global < 0) - { - // Point not found — skip (error check disabled for sub-communicator levels) - j++; - continue; - } - // Translate global rank to local rank in Comm_here - int cur_owner_local; - MPI_Group_translate_ranks(world_group, 1, &cur_owner_global, local_group, &cur_owner_local); - - // Find contiguous run of points with the same owner - int jstart = j; - while (j < NN && owner_rank[j] == cur_owner_global) - j++; - int count = (j - jstart) * num_var; - MPI_Bcast(Shellf + jstart * num_var, count, MPI_DOUBLE, cur_owner_local, Comm_here); - } - } - - MPI_Group_free(&world_group); - MPI_Group_free(&local_group); - delete[] owner_rank; -} -void Patch::checkBlock() -{ - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - if (myrank == 0) - { - MyList *BP = blb; - while (BP) - { - BP->data->checkBlock(); - if (BP == ble) - break; - BP = BP->next; - } - } -} -double Patch::getdX(int dir) -{ - if (dir < 0 || dir >= dim) - { - cout << "Patch::getdX: error input dir = " << dir << ", this Patch has direction (0," << dim - 1 << ")" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - double h; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - if (shape[dir] == 1) - { - cout << "Patch::getdX: for direction " << dir << ", this Patch has only one point. Can not determine dX for vertex center grid." << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - h = (bbox[dim + dir] - bbox[dir]) / (shape[dir] - 1); -#else -#ifdef Cell - h = (bbox[dim + dir] - bbox[dir]) / shape[dir]; -#else -#error Not define Vertex nor Cell -#endif -#endif - return h; -} -bool Patch::Interp_ONE_Point(MyList *VarList, double *XX, - double *Shellf, int Symmetry) -{ - // NOTE: we do not Synchnize variables here, make sure of that before calling this routine - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - - int ordn = 2 * ghost_width; - MyList *varl; - int num_var = 0; - varl = VarList; - while (varl) - { - num_var++; - varl = varl->next; - } - - double *shellf; - shellf = new double[num_var]; - memset(shellf, 0, sizeof(double) * num_var); - - double *DH, *llb, *uub; - DH = new double[dim]; - - for (int i = 0; i < dim; i++) - { - DH[i] = getdX(i); - } - llb = new double[dim]; - uub = new double[dim]; - - double pox[dim]; - for (int i = 0; i < dim; i++) - { - pox[i] = XX[i]; - // has excluded the buffer points - if (XX[i] < bbox[i] + lli[i] * DH[i] - DH[i] / 100 || XX[i] > bbox[dim + i] - uui[i] * DH[i] + DH[i] / 100) - { - delete[] shellf; - delete[] DH; - delete[] llb; - delete[] uub; - return false; // out of current patch, - // remember to delete the allocated arrays before return!!! - } - } - - MyList *Bp = blb; - bool notfind = true; - while (notfind && Bp) // run along Blocks - { - Block *BP = Bp->data; - - bool flag = true; - for (int i = 0; i < dim; i++) - { -// NOTE: our dividing structure is (exclude ghost) -// -1 0 -// 1 2 -// so (0,1) does not belong to any part for vertex structure -// here we put (0,0.5) to left part and (0.5,1) to right part -// BUT for cell structure the bbox is (-1.5,0.5) and (0.5,2.5), there is no missing region at all -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i]; - uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i]; -#else -#ifdef Cell - llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i]; - uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i]; -#else -#error Not define Vertex nor Cell -#endif -#endif - if (XX[i] - llb[i] < -DH[i] / 2 || XX[i] - uub[i] > DH[i] / 2) - { - flag = false; - break; - } - } - - if (flag) - { - notfind = false; - if (myrank == BP->rank) - { -// test old code -#if 0 -#define floorint(a) ((a) < 0 ? int(a) - 1 : int(a)) -//---> interpolation - int ixl,iyl,izl,ixu,iyu,izu; - double Delx,Dely,Delz; - - ixl = 1+floorint((pox[0]-BP->X[0][0])/DH[0]); - iyl = 1+floorint((pox[1]-BP->X[1][0])/DH[1]); - izl = 1+floorint((pox[2]-BP->X[2][0])/DH[2]); - - int nn=ordn/2; - - ixl = ixl-nn; - iyl = iyl-nn; - izl = izl-nn; - - int tmi; - tmi = (Symmetry==2)?-1:0; - if(ixl0)?-1:0; - if(izlBP->shape[0]) ixl=BP->shape[0]-ordn; - if(iyl+ordn>BP->shape[1]) iyl=BP->shape[1]-ordn; - if(izl+ordn>BP->shape[2]) izl=BP->shape[2]-ordn; -// support cell center - if(ixl>=0) Delx = ( pox[0] - BP->X[0][ixl] )/ DH[0]; - else Delx = ( pox[0] + BP->X[0][0] )/ DH[0]; - if(iyl>=0) Dely = ( pox[1] - BP->X[1][iyl] )/ DH[1]; - else Dely = ( pox[1] + BP->X[1][0] )/ DH[1]; - if(izl>=0) Delz = ( pox[2] - BP->X[2][izl] )/ DH[2]; - else Delz = ( pox[2] + BP->X[2][0] )/ DH[2]; -//change to fortran index - ixl++; - iyl++; - izl++; - ixu = ixl + ordn - 1; - iyu = iyl + ordn - 1; - izu = izl + ordn - 1; - varl=VarList; - int j=0; - while(varl) - { - f_interp_2(BP->shape,BP->fgfs[varl->data->sgfn],shellf[j],ixl,ixu,iyl,iyu,izl,izu,Delx,Dely,Delz, - ordn,varl->data->SoA,Symmetry); - varl=varl->next; - j++; - } //varl -#else - //---> interpolation - varl = VarList; - int k = 0; - while (varl) // run along variables - { - // shellf[j*num_var+k] = Parallel::global_interp(dim,BP->shape,BP->X,BP->fgfs[varl->data->sgfn], - // pox,ordn,varl->data->SoA,Symmetry); - f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], shellf[k], - pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); - varl = varl->next; - k++; - } -#endif - } - } - if (Bp == ble) - break; - Bp = Bp->next; - } - - if (notfind && myrank == 0) - { - cout << "ERROR: Patch::Interp_Points fails to find point ("; - for (int j = 0; j < dim; j++) - { - cout << XX[j]; - if (j < dim - 1) - cout << ","; - else - cout << ")"; - } - cout << " on Patch ("; - for (int j = 0; j < dim; j++) - { - cout << bbox[j] << "+" << lli[j] * getdX(j); - if (j < dim - 1) - cout << ","; - else - cout << ")--"; - } - cout << "("; - for (int j = 0; j < dim; j++) - { - cout << bbox[dim + j] << "-" << uui[j] * getdX(j); - if (j < dim - 1) - cout << ","; - else - cout << ")" << endl; - } -#if 0 - checkBlock(); -#else - cout << "splited domains:" << endl; - { - MyList *Bp = blb; - while (Bp) - { - Block *BP = Bp->data; - - for (int i = 0; i < dim; i++) - { -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i]; - uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i]; -#else -#ifdef Cell - llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i]; - uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i]; -#else -#error Not define Vertex nor Cell -#endif -#endif - } - cout << "("; - for (int j = 0; j < dim; j++) - { - cout << llb[j] << ":" << uub[j]; - if (j < dim - 1) - cout << ","; - else - cout << ")" << endl; - } - if (Bp == ble) - break; - Bp = Bp->next; - } - } -#endif - MPI_Abort(MPI_COMM_WORLD, 1); - } - - MPI_Allreduce(shellf, Shellf, num_var, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - - delete[] shellf; - delete[] DH; - delete[] llb; - delete[] uub; - - return true; -} -bool Patch::Interp_ONE_Point(MyList *VarList, double *XX, - double *Shellf, int Symmetry, MPI_Comm Comm_here) -{ - // NOTE: we do not Synchnize variables here, make sure of that before calling this routine - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - - int ordn = 2 * ghost_width; - MyList *varl; - int num_var = 0; - varl = VarList; - while (varl) - { - num_var++; - varl = varl->next; - } - - double *shellf; - shellf = new double[num_var]; - memset(shellf, 0, sizeof(double) * num_var); - - double *DH, *llb, *uub; - DH = new double[dim]; - - for (int i = 0; i < dim; i++) - { - DH[i] = getdX(i); - } - llb = new double[dim]; - uub = new double[dim]; - - double pox[dim]; - for (int i = 0; i < dim; i++) - { - pox[i] = XX[i]; - // has excluded the buffer points - if (XX[i] < bbox[i] + lli[i] * DH[i] - DH[i] / 100 || XX[i] > bbox[dim + i] - uui[i] * DH[i] + DH[i] / 100) - { - delete[] shellf; - delete[] DH; - delete[] llb; - delete[] uub; - return false; // out of current patch, - // remember to delete the allocated arrays before return!!! - } - } - - MyList *Bp = blb; - bool notfind = true; - while (notfind && Bp) // run along Blocks - { - Block *BP = Bp->data; - - bool flag = true; - for (int i = 0; i < dim; i++) - { -// NOTE: our dividing structure is (exclude ghost) -// -1 0 -// 1 2 -// so (0,1) does not belong to any part for vertex structure -// here we put (0,0.5) to left part and (0.5,1) to right part -// BUT for cell structure the bbox is (-1.5,0.5) and (0.5,2.5), there is no missing region at all -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i]; - uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i]; -#else -#ifdef Cell - llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i]; - uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i]; -#else -#error Not define Vertex nor Cell -#endif -#endif - if (XX[i] - llb[i] < -DH[i] / 2 || XX[i] - uub[i] > DH[i] / 2) - { - flag = false; - break; - } - } - - if (flag) - { - notfind = false; - if (myrank == BP->rank) - { -// test old code -#if 0 -#define floorint(a) ((a) < 0 ? int(a) - 1 : int(a)) -//---> interpolation - int ixl,iyl,izl,ixu,iyu,izu; - double Delx,Dely,Delz; - - ixl = 1+floorint((pox[0]-BP->X[0][0])/DH[0]); - iyl = 1+floorint((pox[1]-BP->X[1][0])/DH[1]); - izl = 1+floorint((pox[2]-BP->X[2][0])/DH[2]); - - int nn=ordn/2; - - ixl = ixl-nn; - iyl = iyl-nn; - izl = izl-nn; - - int tmi; - tmi = (Symmetry==2)?-1:0; - if(ixl0)?-1:0; - if(izlBP->shape[0]) ixl=BP->shape[0]-ordn; - if(iyl+ordn>BP->shape[1]) iyl=BP->shape[1]-ordn; - if(izl+ordn>BP->shape[2]) izl=BP->shape[2]-ordn; -// support cell center - if(ixl>=0) Delx = ( pox[0] - BP->X[0][ixl] )/ DH[0]; - else Delx = ( pox[0] + BP->X[0][0] )/ DH[0]; - if(iyl>=0) Dely = ( pox[1] - BP->X[1][iyl] )/ DH[1]; - else Dely = ( pox[1] + BP->X[1][0] )/ DH[1]; - if(izl>=0) Delz = ( pox[2] - BP->X[2][izl] )/ DH[2]; - else Delz = ( pox[2] + BP->X[2][0] )/ DH[2]; -//change to fortran index - ixl++; - iyl++; - izl++; - ixu = ixl + ordn - 1; - iyu = iyl + ordn - 1; - izu = izl + ordn - 1; - varl=VarList; - int j=0; - while(varl) - { - f_interp_2(BP->shape,BP->fgfs[varl->data->sgfn],shellf[j],ixl,ixu,iyl,iyu,izl,izu,Delx,Dely,Delz, - ordn,varl->data->SoA,Symmetry); - varl=varl->next; - j++; - } //varl -#else - //---> interpolation - varl = VarList; - int k = 0; - while (varl) // run along variables - { - // shellf[j*num_var+k] = Parallel::global_interp(dim,BP->shape,BP->X,BP->fgfs[varl->data->sgfn], - // pox,ordn,varl->data->SoA,Symmetry); - f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], shellf[k], - pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); - varl = varl->next; - k++; - } -#endif - } - } - if (Bp == ble) - break; - Bp = Bp->next; - } - - if (notfind && myrank == 0) - { - cout << "ERROR: Patch::Interp_Points fails to find point ("; - for (int j = 0; j < dim; j++) - { - cout << XX[j]; - if (j < dim - 1) - cout << ","; - else - cout << ")"; - } - cout << " on Patch ("; - for (int j = 0; j < dim; j++) - { - cout << bbox[j] << "+" << lli[j] * getdX(j); - if (j < dim - 1) - cout << ","; - else - cout << ")--"; - } - cout << "("; - for (int j = 0; j < dim; j++) - { - cout << bbox[dim + j] << "-" << uui[j] * getdX(j); - if (j < dim - 1) - cout << ","; - else - cout << ")" << endl; - } -#if 0 - checkBlock(); -#else - cout << "splited domains:" << endl; - { - MyList *Bp = blb; - while (Bp) - { - Block *BP = Bp->data; - - for (int i = 0; i < dim; i++) - { -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i]; - uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i]; -#else -#ifdef Cell - llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i]; - uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i]; -#else -#error Not define Vertex nor Cell -#endif -#endif - } - cout << "("; - for (int j = 0; j < dim; j++) - { - cout << llb[j] << ":" << uub[j]; - if (j < dim - 1) - cout << ","; - else - cout << ")" << endl; - } - if (Bp == ble) - break; - Bp = Bp->next; - } - } -#endif - MPI_Abort(MPI_COMM_WORLD, 1); - } - - MPI_Allreduce(shellf, Shellf, num_var, MPI_DOUBLE, MPI_SUM, Comm_here); - - delete[] shellf; - delete[] DH; - delete[] llb; - delete[] uub; - - return true; -} -// find maximum of abstract value, XX store position for maximum, Shellf store maximum themselvs -void Patch::Find_Maximum(MyList *VarList, double *XX, - double *Shellf) -{ - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - - MyList *varl; - int num_var = 0; - varl = VarList; - while (varl) - { - num_var++; - varl = varl->next; - } - - double *shellf, *xx; - shellf = new double[num_var]; - xx = new double[dim * num_var]; - memset(shellf, 0, sizeof(double) * num_var); - memset(xx, 0, sizeof(double) * dim * num_var); - - double *DH; - int *llb, *uub; - DH = new double[dim]; - - for (int i = 0; i < dim; i++) - { - DH[i] = getdX(i); - } - - llb = new int[dim]; - uub = new int[dim]; - - MyList *Bp = blb; - while (Bp) // run along Blocks - { - Block *BP = Bp->data; - - if (myrank == BP->rank) - { - - for (int i = 0; i < dim; i++) - { - llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? lli[i] : ghost_width; - uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? uui[i] : ghost_width; - } - - varl = VarList; - int k = 0; - double tmp, tmpx[dim]; - while (varl) // run along variables - { - f_find_maximum(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], tmp, tmpx, llb, uub); - if (tmp > shellf[k]) - { - shellf[k] = tmp; - for (int i = 0; i < dim; i++) - xx[dim * k + i] = tmpx[i]; - } - varl = varl->next; - k++; - } - } - - if (Bp == ble) - break; - Bp = Bp->next; - } - - struct mloc - { - double val; - int rank; - }; - - mloc *IN, *OUT; - IN = new mloc[num_var]; - OUT = new mloc[num_var]; - for (int i = 0; i < num_var; i++) - { - IN[i].val = shellf[i]; - IN[i].rank = myrank; - } - - MPI_Allreduce(IN, OUT, num_var, MPI_DOUBLE_INT, MPI_MAXLOC, MPI_COMM_WORLD); - - for (int i = 0; i < num_var; i++) - { - Shellf[i] = OUT[i].val; - if (myrank != OUT[i].rank) - for (int k = 0; k < 3; k++) - xx[3 * i + k] = 0; - } - - MPI_Allreduce(xx, XX, dim * num_var, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - - delete[] IN; - delete[] OUT; - delete[] shellf; - delete[] xx; - delete[] DH; - delete[] llb; - delete[] uub; -} -void Patch::Find_Maximum(MyList *VarList, double *XX, - double *Shellf, MPI_Comm Comm_here) -{ - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - - MyList *varl; - int num_var = 0; - varl = VarList; - while (varl) - { - num_var++; - varl = varl->next; - } - - double *shellf, *xx; - shellf = new double[num_var]; - xx = new double[dim * num_var]; - memset(shellf, 0, sizeof(double) * num_var); - memset(xx, 0, sizeof(double) * dim * num_var); - - double *DH; - int *llb, *uub; - DH = new double[dim]; - - for (int i = 0; i < dim; i++) - { - DH[i] = getdX(i); - } - - llb = new int[dim]; - uub = new int[dim]; - - MyList *Bp = blb; - while (Bp) // run along Blocks - { - Block *BP = Bp->data; - - if (myrank == BP->rank) - { - - for (int i = 0; i < dim; i++) - { - llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? lli[i] : ghost_width; - uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? uui[i] : ghost_width; - } - - varl = VarList; - int k = 0; - double tmp, tmpx[dim]; - while (varl) // run along variables - { - f_find_maximum(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], tmp, tmpx, llb, uub); - if (tmp > shellf[k]) - { - shellf[k] = tmp; - for (int i = 0; i < dim; i++) - xx[dim * k + i] = tmpx[i]; - } - varl = varl->next; - k++; - } - } - - if (Bp == ble) - break; - Bp = Bp->next; - } - - struct mloc - { - double val; - int rank; - }; - - mloc *IN, *OUT; - IN = new mloc[num_var]; - OUT = new mloc[num_var]; - for (int i = 0; i < num_var; i++) - { - IN[i].val = shellf[i]; - IN[i].rank = myrank; - } - - MPI_Allreduce(IN, OUT, num_var, MPI_DOUBLE_INT, MPI_MAXLOC, Comm_here); - - for (int i = 0; i < num_var; i++) - { - Shellf[i] = OUT[i].val; - if (myrank != OUT[i].rank) - for (int k = 0; k < 3; k++) - xx[3 * i + k] = 0; - } - - MPI_Allreduce(xx, XX, dim * num_var, MPI_DOUBLE, MPI_SUM, Comm_here); - - delete[] IN; - delete[] OUT; - delete[] shellf; - delete[] xx; - delete[] DH; - delete[] llb; - delete[] uub; -} -// if the given point locates in the present Patch return true -// otherwise return false -bool Patch::Find_Point(double *XX) -{ - double *DH; - DH = new double[dim]; - - for (int i = 0; i < dim; i++) - { - DH[i] = getdX(i); - } - - for (int i = 0; i < dim; i++) - { - // has excluded the buffer points - if (XX[i] < bbox[i] + lli[i] * DH[i] - DH[i] / 100 || XX[i] > bbox[dim + i] - uui[i] * DH[i] + DH[i] / 100) - { - delete[] DH; - return false; // out of current patch, - // remember to delete the allocated arrays before return!!! - } - } - - delete[] DH; - - return true; + +#include +#include +#include +#include +#include +#include +#include +#include +using namespace std; + +#include "misc.h" +#include "MPatch.h" +#include "Parallel.h" +#include "fmisc.h" + +Patch::Patch(int DIM, int *shapei, double *bboxi, int levi, bool buflog, int Symmetry) : lev(levi) +{ + + int hbuffer_width = buffer_width; + if (lev == 0) + hbuffer_width = CS_width; // specific for shell-box coulping + + if (DIM != dim) + { + cout << "dimension is not consistent in Patch construction" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + for (int i = 0; i < dim; i++) + { + shape[i] = shapei[i]; + bbox[i] = bboxi[i]; + bbox[dim + i] = bboxi[dim + i]; + lli[i] = uui[i] = 0; + if (buflog) + { + double DH; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + DH = (bbox[dim + i] - bbox[i]) / (shape[i] - 1); +#else +#ifdef Cell + DH = (bbox[dim + i] - bbox[i]) / shape[i]; +#else +#error Not define Vertex nor Cell +#endif +#endif + uui[i] = hbuffer_width; + bbox[dim + i] = bbox[dim + i] + uui[i] * DH; + shape[i] = shape[i] + uui[i]; + } + } + + if (buflog) + { + if (DIM != 3) + { + cout << "Symmetry in Patch construction only support 3 yet but dim = " << DIM << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + double tmpb, DH; + if (Symmetry > 0) + { +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + DH = (bbox[5] - bbox[2]) / (shape[2] - 1); +#else +#ifdef Cell + DH = (bbox[5] - bbox[2]) / shape[2]; +#else +#error Not define Vertex nor Cell +#endif +#endif + tmpb = Mymax(0, bbox[2] - hbuffer_width * DH); + lli[2] = int((bbox[2] - tmpb) / DH + 0.4); + bbox[2] = bbox[2] - lli[2] * DH; + shape[2] = shape[2] + lli[2]; + if (lli[2] < hbuffer_width) + { + if (feq(bbox[2], 0, DH / 2)) + lli[2] = 0; + else + { + cout << "Code mistake for lli[2] = " << lli[2] << ", bbox[2] = " << bbox[2] << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + if (Symmetry > 1) + { +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + DH = (bbox[3] - bbox[0]) / (shape[0] - 1); +#else +#ifdef Cell + DH = (bbox[3] - bbox[0]) / shape[0]; +#else +#error Not define Vertex nor Cell +#endif +#endif + tmpb = Mymax(0, bbox[0] - hbuffer_width * DH); + lli[0] = int((bbox[0] - tmpb) / DH + 0.4); + bbox[0] = bbox[0] - lli[0] * DH; + shape[0] = shape[0] + lli[0]; + if (lli[0] < hbuffer_width) + { + if (feq(bbox[0], 0, DH / 2)) + lli[0] = 0; + else + { + cout << "Code mistake for lli[0] = " << lli[0] << ", bbox[0] = " << bbox[0] << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + } +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + DH = (bbox[4] - bbox[1]) / (shape[1] - 1); +#else +#ifdef Cell + DH = (bbox[4] - bbox[1]) / shape[1]; +#else +#error Not define Vertex nor Cell +#endif +#endif + tmpb = Mymax(0, bbox[1] - hbuffer_width * DH); + lli[1] = int((bbox[1] - tmpb) / DH + 0.4); + bbox[1] = bbox[1] - lli[1] * DH; + shape[1] = shape[1] + lli[1]; + if (lli[1] < hbuffer_width) + { + if (feq(bbox[1], 0, DH / 2)) + lli[1] = 0; + else + { + cout << "Code mistake for lli[1] = " << lli[1] << ", bbox[1] = " << bbox[1] << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + } + else + { + for (int i = 0; i < 2; i++) + { +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + DH = (bbox[dim + i] - bbox[i]) / (shape[i] - 1); +#else +#ifdef Cell + DH = (bbox[dim + i] - bbox[i]) / shape[i]; +#else +#error Not define Vertex nor Cell +#endif +#endif + lli[i] = hbuffer_width; + bbox[i] = bbox[i] - lli[i] * DH; + shape[i] = shape[i] + lli[i]; + } + } + } + else + { + for (int i = 0; i < dim; i++) + { +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + DH = (bbox[dim + i] - bbox[i]) / (shape[i] - 1); +#else +#ifdef Cell + DH = (bbox[dim + i] - bbox[i]) / shape[i]; +#else +#error Not define Vertex nor Cell +#endif +#endif + lli[i] = hbuffer_width; + bbox[i] = bbox[i] - lli[i] * DH; + shape[i] = shape[i] + lli[i]; + } + } + } + + blb = ble = 0; +} +Patch::~Patch() +{ +} +// buflog 1: with buffer points; 0 without +void Patch::checkPatch(bool buflog) +{ + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + if (myrank == 0) + { + if (buflog) + { + cout << " belong to level " << lev << endl; + cout << " shape: ["; + for (int i = 0; i < dim; i++) + { + cout << shape[i]; + if (i < dim - 1) + cout << ","; + else + cout << "]"; + } + cout << " resolution: ["; + for (int i = 0; i < dim; i++) + { + cout << getdX(i); + if (i < dim - 1) + cout << ","; + else + cout << "]" << endl; + } + cout << " range:" << "("; + for (int i = 0; i < dim; i++) + { + cout << bbox[i] << ":" << bbox[dim + i]; + if (i < dim - 1) + cout << ","; + else + cout << ")" << endl; + } + } + else + { + cout << " belong to level " << lev << endl; + cout << " shape: ["; + for (int i = 0; i < dim; i++) + { + cout << shape[i] - lli[i] - uui[i]; + if (i < dim - 1) + cout << ","; + else + cout << "]"; + } + cout << " resolution: ["; + for (int i = 0; i < dim; i++) + { + cout << getdX(i); + if (i < dim - 1) + cout << ","; + else + cout << "]" << endl; + } + cout << " range:" << "("; + for (int i = 0; i < dim; i++) + { + cout << bbox[i] + lli[i] * getdX(i) << ":" << bbox[dim + i] - uui[i] * getdX(i); + if (i < dim - 1) + cout << ","; + else + cout << ")" << endl; + } + } + } +} +void Patch::checkPatch(bool buflog, const int out_rank) +{ + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + if (myrank == out_rank) + { + cout << " out_rank = " << out_rank << endl; + if (buflog) + { + cout << " belong to level " << lev << endl; + cout << " shape: ["; + for (int i = 0; i < dim; i++) + { + cout << shape[i]; + if (i < dim - 1) + cout << ","; + else + cout << "]"; + } + cout << " resolution: ["; + for (int i = 0; i < dim; i++) + { + cout << getdX(i); + if (i < dim - 1) + cout << ","; + else + cout << "]" << endl; + } + cout << " range:" << "("; + for (int i = 0; i < dim; i++) + { + cout << bbox[i] << ":" << bbox[dim + i]; + if (i < dim - 1) + cout << ","; + else + cout << ")" << endl; + } + } + else + { + cout << " belong to level " << lev << endl; + cout << " shape: ["; + for (int i = 0; i < dim; i++) + { + cout << shape[i] - lli[i] - uui[i]; + if (i < dim - 1) + cout << ","; + else + cout << "]"; + } + cout << " resolution: ["; + for (int i = 0; i < dim; i++) + { + cout << getdX(i); + if (i < dim - 1) + cout << ","; + else + cout << "]" << endl; + } + cout << " range:" << "("; + for (int i = 0; i < dim; i++) + { + cout << bbox[i] + lli[i] * getdX(i) << ":" << bbox[dim + i] - uui[i] * getdX(i); + if (i < dim - 1) + cout << ","; + else + cout << ")" << endl; + } + } + } +} +void Patch::Interp_Points(MyList *VarList, + int NN, double **XX, + double *Shellf, int Symmetry) +{ + // NOTE: we do not Synchnize variables here, make sure of that before calling this routine + int myrank, nprocs; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + int ordn = 2 * ghost_width; + MyList *varl; + int num_var = 0; + varl = VarList; + while (varl) + { + num_var++; + varl = varl->next; + } + + memset(Shellf, 0, sizeof(double) * NN * num_var); + + // owner_rank[j] records which MPI rank owns point j + // All ranks traverse the same block list so they all agree on ownership + int *owner_rank; + owner_rank = new int[NN]; + for (int j = 0; j < NN; j++) + owner_rank[j] = -1; + + double DH[dim], llb[dim], uub[dim]; + for (int i = 0; i < dim; i++) + DH[i] = getdX(i); + + for (int j = 0; j < NN; j++) // run along points + { + double pox[dim]; + for (int i = 0; i < dim; i++) + { + pox[i] = XX[i][j]; + if (myrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i])) + { + cout << "Patch::Interp_Points: point ("; + for (int k = 0; k < dim; k++) + { + cout << XX[k][j]; + if (k < dim - 1) + cout << ","; + else + cout << ") is out of current Patch." << endl; + } + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + + MyList *Bp = blb; + bool notfind = true; + while (notfind && Bp) // run along Blocks + { + Block *BP = Bp->data; + + bool flag = true; + for (int i = 0; i < dim; i++) + { +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i]; + uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i]; +#else +#ifdef Cell + llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i]; + uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i]; +#else +#error Not define Vertex nor Cell +#endif +#endif + if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2) + { + flag = false; + break; + } + } + + if (flag) + { + notfind = false; + owner_rank[j] = BP->rank; + if (myrank == BP->rank) + { + //---> interpolation + varl = VarList; + int k = 0; + while (varl) // run along variables + { + f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k], + pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); + varl = varl->next; + k++; + } + } + } + if (Bp == ble) + break; + Bp = Bp->next; + } + } + // Replace MPI_Allreduce with per-owner MPI_Bcast: + // Group consecutive points by owner rank and broadcast each group. + // Since each point's data is non-zero only on the owner rank, + // Bcast from owner is equivalent to Allreduce(MPI_SUM) but much cheaper. + { + int j = 0; + while (j < NN) + { + int cur_owner = owner_rank[j]; + if (cur_owner < 0) + { + if (myrank == 0) + { + cout << "ERROR: Patch::Interp_Points fails to find point ("; + for (int d = 0; d < dim; d++) + { + cout << XX[d][j]; + if (d < dim - 1) + cout << ","; + else + cout << ")"; + } + cout << " on Patch ("; + for (int d = 0; d < dim; d++) + { + cout << bbox[d] << "+" << lli[d] * DH[d]; + if (d < dim - 1) + cout << ","; + else + cout << ")--"; + } + cout << "("; + for (int d = 0; d < dim; d++) + { + cout << bbox[dim + d] << "-" << uui[d] * DH[d]; + if (d < dim - 1) + cout << ","; + else + cout << ")" << endl; + } + MPI_Abort(MPI_COMM_WORLD, 1); + } + j++; + continue; + } + // Find contiguous run of points with the same owner + int jstart = j; + while (j < NN && owner_rank[j] == cur_owner) + j++; + int count = (j - jstart) * num_var; + MPI_Bcast(Shellf + jstart * num_var, count, MPI_DOUBLE, cur_owner, MPI_COMM_WORLD); + } + } + + delete[] owner_rank; +} +void Patch::Interp_Points(MyList *VarList, + int NN, double **XX, + double *Shellf, int Symmetry, + int Nmin_consumer, int Nmax_consumer) +{ + // Targeted point-to-point overload: each owner sends each point only to + // the one rank that needs it for integration (consumer), reducing + // communication volume by ~nprocs times compared to the Bcast version. + /* + double t_calc_end, t_calc_total = 0; + double t_calc_start = MPI_Wtime();*/ + int myrank, nprocs; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + int ordn = 2 * ghost_width; + MyList *varl; + int num_var = 0; + varl = VarList; + while (varl) + { + num_var++; + varl = varl->next; + } + + memset(Shellf, 0, sizeof(double) * NN * num_var); + + // owner_rank[j] records which MPI rank owns point j + int *owner_rank; + owner_rank = new int[NN]; + for (int j = 0; j < NN; j++) + owner_rank[j] = -1; + + double DH[dim], llb[dim], uub[dim]; + for (int i = 0; i < dim; i++) + DH[i] = getdX(i); + + // --- Interpolation phase (identical to original) --- + for (int j = 0; j < NN; j++) + { + double pox[dim]; + for (int i = 0; i < dim; i++) + { + pox[i] = XX[i][j]; + if (myrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i])) + { + cout << "Patch::Interp_Points: point ("; + for (int k = 0; k < dim; k++) + { + cout << XX[k][j]; + if (k < dim - 1) + cout << ","; + else + cout << ") is out of current Patch." << endl; + } + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + + MyList *Bp = blb; + bool notfind = true; + while (notfind && Bp) + { + Block *BP = Bp->data; + + bool flag = true; + for (int i = 0; i < dim; i++) + { +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i]; + uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i]; +#else +#ifdef Cell + llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i]; + uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i]; +#else +#error Not define Vertex nor Cell +#endif +#endif + if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2) + { + flag = false; + break; + } + } + + if (flag) + { + notfind = false; + owner_rank[j] = BP->rank; + if (myrank == BP->rank) + { + varl = VarList; + int k = 0; + while (varl) + { + f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k], + pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); + varl = varl->next; + k++; + } + } + } + if (Bp == ble) + break; + Bp = Bp->next; + } + } + /* + t_calc_end = MPI_Wtime(); + t_calc_total = t_calc_end - t_calc_start;*/ + // --- Error check for unfound points --- + for (int j = 0; j < NN; j++) + { + if (owner_rank[j] < 0 && myrank == 0) + { + cout << "ERROR: Patch::Interp_Points fails to find point ("; + for (int d = 0; d < dim; d++) + { + cout << XX[d][j]; + if (d < dim - 1) + cout << ","; + else + cout << ")"; + } + cout << " on Patch ("; + for (int d = 0; d < dim; d++) + { + cout << bbox[d] << "+" << lli[d] * DH[d]; + if (d < dim - 1) + cout << ","; + else + cout << ")--"; + } + cout << "("; + for (int d = 0; d < dim; d++) + { + cout << bbox[dim + d] << "-" << uui[d] * DH[d]; + if (d < dim - 1) + cout << ","; + else + cout << ")" << endl; + } + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + + // --- Targeted point-to-point communication phase --- + // Compute consumer_rank[j] using the same deterministic formula as surface_integral + int *consumer_rank = new int[NN]; + { + int mp = NN / nprocs; + int Lp = NN - nprocs * mp; + for (int j = 0; j < NN; j++) + { + if (j < Lp * (mp + 1)) + consumer_rank[j] = j / (mp + 1); + else + consumer_rank[j] = Lp + (j - Lp * (mp + 1)) / mp; + } + } + + // Count sends and recvs per rank + int *send_count = new int[nprocs]; + int *recv_count = new int[nprocs]; + memset(send_count, 0, sizeof(int) * nprocs); + memset(recv_count, 0, sizeof(int) * nprocs); + + for (int j = 0; j < NN; j++) + { + int own = owner_rank[j]; + int con = consumer_rank[j]; + if (own == con) + continue; // local — no communication needed + if (own == myrank) + send_count[con]++; + if (con == myrank) + recv_count[own]++; + } + + // Build send buffers: for each destination rank, pack (index, data) pairs + // Each entry: 1 int (point index j) + num_var doubles + int total_send = 0, total_recv = 0; + int *send_offset = new int[nprocs]; + int *recv_offset = new int[nprocs]; + for (int r = 0; r < nprocs; r++) + { + send_offset[r] = total_send; + total_send += send_count[r]; + recv_offset[r] = total_recv; + total_recv += recv_count[r]; + } + + // Pack send buffers: each message contains (j, data[0..num_var-1]) per point + int stride = 1 + num_var; // 1 double for index + num_var doubles for data + double *sendbuf = new double[total_send * stride]; + double *recvbuf = new double[total_recv * stride]; + + // Temporary counters for packing + int *pack_pos = new int[nprocs]; + memset(pack_pos, 0, sizeof(int) * nprocs); + + for (int j = 0; j < NN; j++) + { + int own = owner_rank[j]; + int con = consumer_rank[j]; + if (own != myrank || con == myrank) + continue; + int pos = (send_offset[con] + pack_pos[con]) * stride; + sendbuf[pos] = (double)j; // point index + for (int v = 0; v < num_var; v++) + sendbuf[pos + 1 + v] = Shellf[j * num_var + v]; + pack_pos[con]++; + } + + // Post non-blocking recvs and sends + int n_req = 0; + for (int r = 0; r < nprocs; r++) + { + if (recv_count[r] > 0) n_req++; + if (send_count[r] > 0) n_req++; + } + + MPI_Request *reqs = new MPI_Request[n_req]; + int req_idx = 0; + + for (int r = 0; r < nprocs; r++) + { + if (recv_count[r] > 0) + { + MPI_Irecv(recvbuf + recv_offset[r] * stride, + recv_count[r] * stride, MPI_DOUBLE, + r, 0, MPI_COMM_WORLD, &reqs[req_idx++]); + } + } + for (int r = 0; r < nprocs; r++) + { + if (send_count[r] > 0) + { + MPI_Isend(sendbuf + send_offset[r] * stride, + send_count[r] * stride, MPI_DOUBLE, + r, 0, MPI_COMM_WORLD, &reqs[req_idx++]); + } + } + + if (n_req > 0) + MPI_Waitall(n_req, reqs, MPI_STATUSES_IGNORE); + + // Unpack recv buffers into Shellf + for (int i = 0; i < total_recv; i++) + { + int pos = i * stride; + int j = (int)recvbuf[pos]; + for (int v = 0; v < num_var; v++) + Shellf[j * num_var + v] = recvbuf[pos + 1 + v]; + } + + delete[] reqs; + delete[] sendbuf; + delete[] recvbuf; + delete[] pack_pos; + delete[] send_offset; + delete[] recv_offset; + delete[] send_count; + delete[] recv_count; + delete[] consumer_rank; + delete[] owner_rank; + /* + // 4. 汇总并输出真正干活最慢的 Top 4 + struct RankStats { + int rank; + double calc_time; // 净计算时间 + }; + + // 创建当前进程的统计数据 + RankStats local_stat; + local_stat.rank = myrank; + local_stat.calc_time = t_calc_total; + + // 为所有进程的统计数据分配内存 + RankStats *all_stats = nullptr; + if (myrank == 0) { + all_stats = new RankStats[nprocs]; + } + + // 使用MPI_Gather收集所有进程的数据到rank 0 + MPI_Gather(&local_stat, sizeof(RankStats), MPI_BYTE, + all_stats, sizeof(RankStats), MPI_BYTE, + 0, MPI_COMM_WORLD); + + // 准备输出前4个rank的信息(所有rank都参与,确保广播后一致) + int top10_ranks[10] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; + double top10_times[10] = { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; + int num_top10 = 0; + + if (myrank == 0) { + // 按 calc_time(净计算时间)排序 + std::sort(all_stats, all_stats + nprocs, [](const RankStats& a, const RankStats& b) { + return a.calc_time > b.calc_time; + }); + + // 取前4个 + num_top10 = (nprocs < 10) ? nprocs : 10; + for (int i = 0; i < num_top10; i++) { + top10_ranks[i] = all_stats[i].rank; + top10_times[i] = all_stats[i].calc_time; + } + + printf("\n--- Top %d Ranks by ACTIVE COMPUTATION (CPU Time) ---\n", num_top10); + for (int i = 0; i < num_top10; i++) { + printf("Rank [%4d]: Calc %.6f s\n", top10_ranks[i], top10_times[i]); + } + + // 清理分配的内存 + delete[] all_stats; + } + + // 广播前4个rank的信息给所有进程 + MPI_Bcast(&num_top10, 1, MPI_INT, 0, MPI_COMM_WORLD); + if (num_top10 > 0) { + MPI_Bcast(top10_ranks, 10, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(top10_times, 10, MPI_DOUBLE, 0, MPI_COMM_WORLD); + } +*/ +} +void Patch::Interp_Points(MyList *VarList, + int NN, double **XX, + double *Shellf, int Symmetry, MPI_Comm Comm_here) +{ + // NOTE: we do not Synchnize variables here, make sure of that before calling this routine + int myrank, lmyrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + MPI_Comm_rank(Comm_here, &lmyrank); + + int ordn = 2 * ghost_width; + MyList *varl; + int num_var = 0; + varl = VarList; + while (varl) + { + num_var++; + varl = varl->next; + } + + memset(Shellf, 0, sizeof(double) * NN * num_var); + + // owner_rank[j] stores the global rank that owns point j + int *owner_rank; + owner_rank = new int[NN]; + for (int j = 0; j < NN; j++) + owner_rank[j] = -1; + + // Build global-to-local rank translation for Comm_here + MPI_Group world_group, local_group; + MPI_Comm_group(MPI_COMM_WORLD, &world_group); + MPI_Comm_group(Comm_here, &local_group); + + double DH[dim], llb[dim], uub[dim]; + for (int i = 0; i < dim; i++) + DH[i] = getdX(i); + + for (int j = 0; j < NN; j++) // run along points + { + double pox[dim]; + for (int i = 0; i < dim; i++) + { + pox[i] = XX[i][j]; + if (lmyrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i])) + { + cout << "Patch::Interp_Points: point ("; + for (int k = 0; k < dim; k++) + { + cout << XX[k][j]; + if (k < dim - 1) + cout << ","; + else + cout << ") is out of current Patch." << endl; + } + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + + MyList *Bp = blb; + bool notfind = true; + while (notfind && Bp) // run along Blocks + { + Block *BP = Bp->data; + + bool flag = true; + for (int i = 0; i < dim; i++) + { +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i]; + uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i]; +#else +#ifdef Cell + llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i]; + uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i]; +#else +#error Not define Vertex nor Cell +#endif +#endif + if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2) + { + flag = false; + break; + } + } + + if (flag) + { + notfind = false; + owner_rank[j] = BP->rank; + if (myrank == BP->rank) + { + //---> interpolation + varl = VarList; + int k = 0; + while (varl) // run along variables + { + f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k], + pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); + varl = varl->next; + k++; + } + } + } + if (Bp == ble) + break; + Bp = Bp->next; + } + } + + // Collect unique global owner ranks and translate to local ranks in Comm_here + // Then broadcast each owner's points via MPI_Bcast on Comm_here + { + int j = 0; + while (j < NN) + { + int cur_owner_global = owner_rank[j]; + if (cur_owner_global < 0) + { + // Point not found — skip (error check disabled for sub-communicator levels) + j++; + continue; + } + // Translate global rank to local rank in Comm_here + int cur_owner_local; + MPI_Group_translate_ranks(world_group, 1, &cur_owner_global, local_group, &cur_owner_local); + + // Find contiguous run of points with the same owner + int jstart = j; + while (j < NN && owner_rank[j] == cur_owner_global) + j++; + int count = (j - jstart) * num_var; + MPI_Bcast(Shellf + jstart * num_var, count, MPI_DOUBLE, cur_owner_local, Comm_here); + } + } + + MPI_Group_free(&world_group); + MPI_Group_free(&local_group); + delete[] owner_rank; +} +void Patch::checkBlock() +{ + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + if (myrank == 0) + { + MyList *BP = blb; + while (BP) + { + BP->data->checkBlock(); + if (BP == ble) + break; + BP = BP->next; + } + } +} +double Patch::getdX(int dir) +{ + if (dir < 0 || dir >= dim) + { + cout << "Patch::getdX: error input dir = " << dir << ", this Patch has direction (0," << dim - 1 << ")" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + double h; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + if (shape[dir] == 1) + { + cout << "Patch::getdX: for direction " << dir << ", this Patch has only one point. Can not determine dX for vertex center grid." << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + h = (bbox[dim + dir] - bbox[dir]) / (shape[dir] - 1); +#else +#ifdef Cell + h = (bbox[dim + dir] - bbox[dir]) / shape[dir]; +#else +#error Not define Vertex nor Cell +#endif +#endif + return h; +} +bool Patch::Interp_ONE_Point(MyList *VarList, double *XX, + double *Shellf, int Symmetry) +{ + // NOTE: we do not Synchnize variables here, make sure of that before calling this routine + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + int ordn = 2 * ghost_width; + MyList *varl; + int num_var = 0; + varl = VarList; + while (varl) + { + num_var++; + varl = varl->next; + } + + double *shellf; + shellf = new double[num_var]; + memset(shellf, 0, sizeof(double) * num_var); + + double *DH, *llb, *uub; + DH = new double[dim]; + + for (int i = 0; i < dim; i++) + { + DH[i] = getdX(i); + } + llb = new double[dim]; + uub = new double[dim]; + + double pox[dim]; + for (int i = 0; i < dim; i++) + { + pox[i] = XX[i]; + // has excluded the buffer points + if (XX[i] < bbox[i] + lli[i] * DH[i] - DH[i] / 100 || XX[i] > bbox[dim + i] - uui[i] * DH[i] + DH[i] / 100) + { + delete[] shellf; + delete[] DH; + delete[] llb; + delete[] uub; + return false; // out of current patch, + // remember to delete the allocated arrays before return!!! + } + } + + MyList *Bp = blb; + bool notfind = true; + while (notfind && Bp) // run along Blocks + { + Block *BP = Bp->data; + + bool flag = true; + for (int i = 0; i < dim; i++) + { +// NOTE: our dividing structure is (exclude ghost) +// -1 0 +// 1 2 +// so (0,1) does not belong to any part for vertex structure +// here we put (0,0.5) to left part and (0.5,1) to right part +// BUT for cell structure the bbox is (-1.5,0.5) and (0.5,2.5), there is no missing region at all +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i]; + uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i]; +#else +#ifdef Cell + llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i]; + uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i]; +#else +#error Not define Vertex nor Cell +#endif +#endif + if (XX[i] - llb[i] < -DH[i] / 2 || XX[i] - uub[i] > DH[i] / 2) + { + flag = false; + break; + } + } + + if (flag) + { + notfind = false; + if (myrank == BP->rank) + { +// test old code +#if 0 +#define floorint(a) ((a) < 0 ? int(a) - 1 : int(a)) +//---> interpolation + int ixl,iyl,izl,ixu,iyu,izu; + double Delx,Dely,Delz; + + ixl = 1+floorint((pox[0]-BP->X[0][0])/DH[0]); + iyl = 1+floorint((pox[1]-BP->X[1][0])/DH[1]); + izl = 1+floorint((pox[2]-BP->X[2][0])/DH[2]); + + int nn=ordn/2; + + ixl = ixl-nn; + iyl = iyl-nn; + izl = izl-nn; + + int tmi; + tmi = (Symmetry==2)?-1:0; + if(ixl0)?-1:0; + if(izlBP->shape[0]) ixl=BP->shape[0]-ordn; + if(iyl+ordn>BP->shape[1]) iyl=BP->shape[1]-ordn; + if(izl+ordn>BP->shape[2]) izl=BP->shape[2]-ordn; +// support cell center + if(ixl>=0) Delx = ( pox[0] - BP->X[0][ixl] )/ DH[0]; + else Delx = ( pox[0] + BP->X[0][0] )/ DH[0]; + if(iyl>=0) Dely = ( pox[1] - BP->X[1][iyl] )/ DH[1]; + else Dely = ( pox[1] + BP->X[1][0] )/ DH[1]; + if(izl>=0) Delz = ( pox[2] - BP->X[2][izl] )/ DH[2]; + else Delz = ( pox[2] + BP->X[2][0] )/ DH[2]; +//change to fortran index + ixl++; + iyl++; + izl++; + ixu = ixl + ordn - 1; + iyu = iyl + ordn - 1; + izu = izl + ordn - 1; + varl=VarList; + int j=0; + while(varl) + { + f_interp_2(BP->shape,BP->fgfs[varl->data->sgfn],shellf[j],ixl,ixu,iyl,iyu,izl,izu,Delx,Dely,Delz, + ordn,varl->data->SoA,Symmetry); + varl=varl->next; + j++; + } //varl +#else + //---> interpolation + varl = VarList; + int k = 0; + while (varl) // run along variables + { + // shellf[j*num_var+k] = Parallel::global_interp(dim,BP->shape,BP->X,BP->fgfs[varl->data->sgfn], + // pox,ordn,varl->data->SoA,Symmetry); + f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], shellf[k], + pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); + varl = varl->next; + k++; + } +#endif + } + } + if (Bp == ble) + break; + Bp = Bp->next; + } + + if (notfind && myrank == 0) + { + cout << "ERROR: Patch::Interp_Points fails to find point ("; + for (int j = 0; j < dim; j++) + { + cout << XX[j]; + if (j < dim - 1) + cout << ","; + else + cout << ")"; + } + cout << " on Patch ("; + for (int j = 0; j < dim; j++) + { + cout << bbox[j] << "+" << lli[j] * getdX(j); + if (j < dim - 1) + cout << ","; + else + cout << ")--"; + } + cout << "("; + for (int j = 0; j < dim; j++) + { + cout << bbox[dim + j] << "-" << uui[j] * getdX(j); + if (j < dim - 1) + cout << ","; + else + cout << ")" << endl; + } +#if 0 + checkBlock(); +#else + cout << "splited domains:" << endl; + { + MyList *Bp = blb; + while (Bp) + { + Block *BP = Bp->data; + + for (int i = 0; i < dim; i++) + { +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i]; + uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i]; +#else +#ifdef Cell + llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i]; + uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i]; +#else +#error Not define Vertex nor Cell +#endif +#endif + } + cout << "("; + for (int j = 0; j < dim; j++) + { + cout << llb[j] << ":" << uub[j]; + if (j < dim - 1) + cout << ","; + else + cout << ")" << endl; + } + if (Bp == ble) + break; + Bp = Bp->next; + } + } +#endif + MPI_Abort(MPI_COMM_WORLD, 1); + } + + MPI_Allreduce(shellf, Shellf, num_var, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + delete[] shellf; + delete[] DH; + delete[] llb; + delete[] uub; + + return true; +} +bool Patch::Interp_ONE_Point(MyList *VarList, double *XX, + double *Shellf, int Symmetry, MPI_Comm Comm_here) +{ + // NOTE: we do not Synchnize variables here, make sure of that before calling this routine + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + int ordn = 2 * ghost_width; + MyList *varl; + int num_var = 0; + varl = VarList; + while (varl) + { + num_var++; + varl = varl->next; + } + + double *shellf; + shellf = new double[num_var]; + memset(shellf, 0, sizeof(double) * num_var); + + double *DH, *llb, *uub; + DH = new double[dim]; + + for (int i = 0; i < dim; i++) + { + DH[i] = getdX(i); + } + llb = new double[dim]; + uub = new double[dim]; + + double pox[dim]; + for (int i = 0; i < dim; i++) + { + pox[i] = XX[i]; + // has excluded the buffer points + if (XX[i] < bbox[i] + lli[i] * DH[i] - DH[i] / 100 || XX[i] > bbox[dim + i] - uui[i] * DH[i] + DH[i] / 100) + { + delete[] shellf; + delete[] DH; + delete[] llb; + delete[] uub; + return false; // out of current patch, + // remember to delete the allocated arrays before return!!! + } + } + + MyList *Bp = blb; + bool notfind = true; + while (notfind && Bp) // run along Blocks + { + Block *BP = Bp->data; + + bool flag = true; + for (int i = 0; i < dim; i++) + { +// NOTE: our dividing structure is (exclude ghost) +// -1 0 +// 1 2 +// so (0,1) does not belong to any part for vertex structure +// here we put (0,0.5) to left part and (0.5,1) to right part +// BUT for cell structure the bbox is (-1.5,0.5) and (0.5,2.5), there is no missing region at all +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i]; + uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i]; +#else +#ifdef Cell + llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i]; + uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i]; +#else +#error Not define Vertex nor Cell +#endif +#endif + if (XX[i] - llb[i] < -DH[i] / 2 || XX[i] - uub[i] > DH[i] / 2) + { + flag = false; + break; + } + } + + if (flag) + { + notfind = false; + if (myrank == BP->rank) + { +// test old code +#if 0 +#define floorint(a) ((a) < 0 ? int(a) - 1 : int(a)) +//---> interpolation + int ixl,iyl,izl,ixu,iyu,izu; + double Delx,Dely,Delz; + + ixl = 1+floorint((pox[0]-BP->X[0][0])/DH[0]); + iyl = 1+floorint((pox[1]-BP->X[1][0])/DH[1]); + izl = 1+floorint((pox[2]-BP->X[2][0])/DH[2]); + + int nn=ordn/2; + + ixl = ixl-nn; + iyl = iyl-nn; + izl = izl-nn; + + int tmi; + tmi = (Symmetry==2)?-1:0; + if(ixl0)?-1:0; + if(izlBP->shape[0]) ixl=BP->shape[0]-ordn; + if(iyl+ordn>BP->shape[1]) iyl=BP->shape[1]-ordn; + if(izl+ordn>BP->shape[2]) izl=BP->shape[2]-ordn; +// support cell center + if(ixl>=0) Delx = ( pox[0] - BP->X[0][ixl] )/ DH[0]; + else Delx = ( pox[0] + BP->X[0][0] )/ DH[0]; + if(iyl>=0) Dely = ( pox[1] - BP->X[1][iyl] )/ DH[1]; + else Dely = ( pox[1] + BP->X[1][0] )/ DH[1]; + if(izl>=0) Delz = ( pox[2] - BP->X[2][izl] )/ DH[2]; + else Delz = ( pox[2] + BP->X[2][0] )/ DH[2]; +//change to fortran index + ixl++; + iyl++; + izl++; + ixu = ixl + ordn - 1; + iyu = iyl + ordn - 1; + izu = izl + ordn - 1; + varl=VarList; + int j=0; + while(varl) + { + f_interp_2(BP->shape,BP->fgfs[varl->data->sgfn],shellf[j],ixl,ixu,iyl,iyu,izl,izu,Delx,Dely,Delz, + ordn,varl->data->SoA,Symmetry); + varl=varl->next; + j++; + } //varl +#else + //---> interpolation + varl = VarList; + int k = 0; + while (varl) // run along variables + { + // shellf[j*num_var+k] = Parallel::global_interp(dim,BP->shape,BP->X,BP->fgfs[varl->data->sgfn], + // pox,ordn,varl->data->SoA,Symmetry); + f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], shellf[k], + pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); + varl = varl->next; + k++; + } +#endif + } + } + if (Bp == ble) + break; + Bp = Bp->next; + } + + if (notfind && myrank == 0) + { + cout << "ERROR: Patch::Interp_Points fails to find point ("; + for (int j = 0; j < dim; j++) + { + cout << XX[j]; + if (j < dim - 1) + cout << ","; + else + cout << ")"; + } + cout << " on Patch ("; + for (int j = 0; j < dim; j++) + { + cout << bbox[j] << "+" << lli[j] * getdX(j); + if (j < dim - 1) + cout << ","; + else + cout << ")--"; + } + cout << "("; + for (int j = 0; j < dim; j++) + { + cout << bbox[dim + j] << "-" << uui[j] * getdX(j); + if (j < dim - 1) + cout << ","; + else + cout << ")" << endl; + } +#if 0 + checkBlock(); +#else + cout << "splited domains:" << endl; + { + MyList *Bp = blb; + while (Bp) + { + Block *BP = Bp->data; + + for (int i = 0; i < dim; i++) + { +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i]; + uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i]; +#else +#ifdef Cell + llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i]; + uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i]; +#else +#error Not define Vertex nor Cell +#endif +#endif + } + cout << "("; + for (int j = 0; j < dim; j++) + { + cout << llb[j] << ":" << uub[j]; + if (j < dim - 1) + cout << ","; + else + cout << ")" << endl; + } + if (Bp == ble) + break; + Bp = Bp->next; + } + } +#endif + MPI_Abort(MPI_COMM_WORLD, 1); + } + + MPI_Allreduce(shellf, Shellf, num_var, MPI_DOUBLE, MPI_SUM, Comm_here); + + delete[] shellf; + delete[] DH; + delete[] llb; + delete[] uub; + + return true; +} +// find maximum of abstract value, XX store position for maximum, Shellf store maximum themselvs +void Patch::Find_Maximum(MyList *VarList, double *XX, + double *Shellf) +{ + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + MyList *varl; + int num_var = 0; + varl = VarList; + while (varl) + { + num_var++; + varl = varl->next; + } + + double *shellf, *xx; + shellf = new double[num_var]; + xx = new double[dim * num_var]; + memset(shellf, 0, sizeof(double) * num_var); + memset(xx, 0, sizeof(double) * dim * num_var); + + double *DH; + int *llb, *uub; + DH = new double[dim]; + + for (int i = 0; i < dim; i++) + { + DH[i] = getdX(i); + } + + llb = new int[dim]; + uub = new int[dim]; + + MyList *Bp = blb; + while (Bp) // run along Blocks + { + Block *BP = Bp->data; + + if (myrank == BP->rank) + { + + for (int i = 0; i < dim; i++) + { + llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? lli[i] : ghost_width; + uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? uui[i] : ghost_width; + } + + varl = VarList; + int k = 0; + double tmp, tmpx[dim]; + while (varl) // run along variables + { + f_find_maximum(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], tmp, tmpx, llb, uub); + if (tmp > shellf[k]) + { + shellf[k] = tmp; + for (int i = 0; i < dim; i++) + xx[dim * k + i] = tmpx[i]; + } + varl = varl->next; + k++; + } + } + + if (Bp == ble) + break; + Bp = Bp->next; + } + + struct mloc + { + double val; + int rank; + }; + + mloc *IN, *OUT; + IN = new mloc[num_var]; + OUT = new mloc[num_var]; + for (int i = 0; i < num_var; i++) + { + IN[i].val = shellf[i]; + IN[i].rank = myrank; + } + + MPI_Allreduce(IN, OUT, num_var, MPI_DOUBLE_INT, MPI_MAXLOC, MPI_COMM_WORLD); + + for (int i = 0; i < num_var; i++) + { + Shellf[i] = OUT[i].val; + if (myrank != OUT[i].rank) + for (int k = 0; k < 3; k++) + xx[3 * i + k] = 0; + } + + MPI_Allreduce(xx, XX, dim * num_var, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + delete[] IN; + delete[] OUT; + delete[] shellf; + delete[] xx; + delete[] DH; + delete[] llb; + delete[] uub; +} +void Patch::Find_Maximum(MyList *VarList, double *XX, + double *Shellf, MPI_Comm Comm_here) +{ + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + MyList *varl; + int num_var = 0; + varl = VarList; + while (varl) + { + num_var++; + varl = varl->next; + } + + double *shellf, *xx; + shellf = new double[num_var]; + xx = new double[dim * num_var]; + memset(shellf, 0, sizeof(double) * num_var); + memset(xx, 0, sizeof(double) * dim * num_var); + + double *DH; + int *llb, *uub; + DH = new double[dim]; + + for (int i = 0; i < dim; i++) + { + DH[i] = getdX(i); + } + + llb = new int[dim]; + uub = new int[dim]; + + MyList *Bp = blb; + while (Bp) // run along Blocks + { + Block *BP = Bp->data; + + if (myrank == BP->rank) + { + + for (int i = 0; i < dim; i++) + { + llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? lli[i] : ghost_width; + uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? uui[i] : ghost_width; + } + + varl = VarList; + int k = 0; + double tmp, tmpx[dim]; + while (varl) // run along variables + { + f_find_maximum(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], tmp, tmpx, llb, uub); + if (tmp > shellf[k]) + { + shellf[k] = tmp; + for (int i = 0; i < dim; i++) + xx[dim * k + i] = tmpx[i]; + } + varl = varl->next; + k++; + } + } + + if (Bp == ble) + break; + Bp = Bp->next; + } + + struct mloc + { + double val; + int rank; + }; + + mloc *IN, *OUT; + IN = new mloc[num_var]; + OUT = new mloc[num_var]; + for (int i = 0; i < num_var; i++) + { + IN[i].val = shellf[i]; + IN[i].rank = myrank; + } + + MPI_Allreduce(IN, OUT, num_var, MPI_DOUBLE_INT, MPI_MAXLOC, Comm_here); + + for (int i = 0; i < num_var; i++) + { + Shellf[i] = OUT[i].val; + if (myrank != OUT[i].rank) + for (int k = 0; k < 3; k++) + xx[3 * i + k] = 0; + } + + MPI_Allreduce(xx, XX, dim * num_var, MPI_DOUBLE, MPI_SUM, Comm_here); + + delete[] IN; + delete[] OUT; + delete[] shellf; + delete[] xx; + delete[] DH; + delete[] llb; + delete[] uub; +} +// if the given point locates in the present Patch return true +// otherwise return false +bool Patch::Find_Point(double *XX) +{ + double *DH; + DH = new double[dim]; + + for (int i = 0; i < dim; i++) + { + DH[i] = getdX(i); + } + + for (int i = 0; i < dim; i++) + { + // has excluded the buffer points + if (XX[i] < bbox[i] + lli[i] * DH[i] - DH[i] / 100 || XX[i] > bbox[dim + i] - uui[i] * DH[i] + DH[i] / 100) + { + delete[] DH; + return false; // out of current patch, + // remember to delete the allocated arrays before return!!! + } + } + + delete[] DH; + + return true; } \ No newline at end of file diff --git a/AMSS_NCKU_source/NullShellPatch.h b/AMSS_NCKU_source/NullShellPatch.h index 26ff030..bad52b3 100644 --- a/AMSS_NCKU_source/NullShellPatch.h +++ b/AMSS_NCKU_source/NullShellPatch.h @@ -24,6 +24,7 @@ using namespace std; #endif #include +#include #include "MyList.h" #include "Block.h" #include "Parallel.h" diff --git a/AMSS_NCKU_source/Parallel.C b/AMSS_NCKU_source/Parallel.C index a9fb3cd..20d70f2 100644 --- a/AMSS_NCKU_source/Parallel.C +++ b/AMSS_NCKU_source/Parallel.C @@ -1,6484 +1,7128 @@ - -#include "Parallel.h" -#include "fmisc.h" -#include "prolongrestrict.h" -#include "misc.h" -#include "parameters.h" - -int Parallel::partition1(int &nx, int split_size, int min_width, int cpusize, int shape) // special for 1 diemnsion -{ - nx = Mymax(1, shape / min_width); - nx = Mymin(cpusize, nx); - - return nx; -} -int Parallel::partition2(int *nxy, int split_size, int *min_width, int cpusize, int *shape) // special for 2 diemnsions -{ -#define SEARCH_SIZE 5 - int i, j, nx, ny; - int maxnx, maxny; - int mnx, mny; - int dn, hmin_width, cmin_width; - int cnx, cny; - double fx, fy; - int block_size; - int n; - - block_size = shape[0] * shape[1]; - n = Mymax(1, (block_size + split_size / 2) / split_size); - - maxnx = Mymax(1, shape[0] / min_width[0]); - maxnx = Mymin(cpusize, maxnx); - maxny = Mymax(1, shape[1] / min_width[1]); - maxny = Mymin(cpusize, maxny); - fx = (double)shape[0] / (shape[0] + shape[1]); - fy = (double)shape[1] / (shape[0] + shape[1]); - nx = mnx = Mymax(1, Mymin(maxnx, (int)(sqrt(double(n)) * fx / fy))); - ny = mny = Mymax(1, Mymin(maxny, (int)(sqrt(double(n)) * fy / fx))); - dn = abs(n - nx * ny); - hmin_width = Mymin(shape[0] / nx, shape[1] / ny); - for (cny = Mymax(1, mny - SEARCH_SIZE); cny <= (Mymin(mny + SEARCH_SIZE, maxny)); cny++) - for (cnx = Mymax(1, mnx - SEARCH_SIZE); cnx <= (Mymin(mnx + SEARCH_SIZE, maxnx)); cnx++) - { - cmin_width = Mymin(shape[0] / cnx, shape[1] / cny); - if (dn > abs(n - cnx * cny) || (dn == abs(n - cnx * cny) && cmin_width > hmin_width)) - { - dn = abs(n - cnx * cny); - nx = cnx; - ny = cny; - hmin_width = cmin_width; - } - } - - nxy[0] = nx; - nxy[1] = ny; - - return nx * ny; -#undef SEARCH_SIZE -} -int Parallel::partition3(int *nxyz, int split_size, int *min_width, int cpusize, int *shape) // special for 3 diemnsions -#if 1 // algrithsm from Pretorius -{ -// cout< abs(n - cnx * cny * cnz) || (dn == abs(n - cnx * cny * cnz) && cmin_width > hmin_width)) - { - dn = abs(n - cnx * cny * cnz); - nx = cnx; - ny = cny; - nz = cnz; - hmin_width = cmin_width; - } - } - - nxyz[0] = nx; - nxyz[1] = ny; - nxyz[2] = nz; - - return nx * ny * nz; -#undef SEARCH_SIZE -} -#elif 1 // Zhihui's idea one on 2013-09-25 -{ - int nx, ny, nz; - int hmin_width; - hmin_width = Mymin(min_width[0], min_width[1]); - hmin_width = Mymin(hmin_width, min_width[2]); - nx = shape[0] / hmin_width; - if (nx * hmin_width < shape[0]) - nx++; - ny = shape[1] / hmin_width; - if (ny * hmin_width < shape[1]) - ny++; - nz = shape[2] / hmin_width; - if (nz * hmin_width < shape[2]) - nz++; - while (nx * ny * nz > cpusize) - { - hmin_width++; - nx = shape[0] / hmin_width; - if (nx * hmin_width < shape[0]) - nx++; - ny = shape[1] / hmin_width; - if (ny * hmin_width < shape[1]) - ny++; - nz = shape[2] / hmin_width; - if (nz * hmin_width < shape[2]) - nz++; - } - - nxyz[0] = nx; - nxyz[1] = ny; - nxyz[2] = nz; - - return nx * ny * nz; -} -#elif 1 // Zhihui's idea two on 2013-09-25 -{ - int nx, ny, nz; - const int hmin_width = 8; // for example we use 8 - nx = shape[0] / hmin_width; - if (nx * hmin_width < shape[0]) - nx++; - ny = shape[1] / hmin_width; - if (ny * hmin_width < shape[1]) - ny++; - nz = shape[2] / hmin_width; - if (nz * hmin_width < shape[2]) - nz++; - - nxyz[0] = nx; - nxyz[1] = ny; - nxyz[2] = nz; - - return nx * ny * nz; -} -#endif -// distribute the data to cprocessors -#if (PSTR == 0) -MyList *Parallel::distribute(MyList *PatchLIST, int cpusize, int ingfsi, int fngfsi, - bool periodic, int nodes) -{ -#ifdef USE_GPU_DIVIDE - double cpu_part, gpu_part; - map::iterator iter; - iter = parameters::dou_par.find("cpu part"); - if (iter != parameters::dou_par.end()) - { - cpu_part = iter->second; - } - else - { - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - // read parameter from file - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - char pname[50]; - { - map::iterator iter = parameters::str_par.find("inputpar"); - if (iter != parameters::str_par.end()) - { - strcpy(pname, (iter->second).c_str()); - } - else - { - cout << "Error inputpar" << endl; - exit(0); - } - } - ifstream inf(pname, ifstream::in); - if (!inf.good() && myrank == 0) - { - cout << "Can not open parameter file " << pname << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - cout << "error reading parameter file " << pname << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "ABE") - { - if (skey == "cpu part") - cpu_part = atof(sval.c_str()); - } - } - inf.close(); - - parameters::dou_par.insert(map::value_type("cpu part", cpu_part)); - } - iter = parameters::dou_par.find("gpu part"); - if (iter != parameters::dou_par.end()) - { - gpu_part = iter->second; - } - else - { - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - // read parameter from file - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - char pname[50]; - { - map::iterator iter = parameters::str_par.find("inputpar"); - if (iter != parameters::str_par.end()) - { - strcpy(pname, (iter->second).c_str()); - } - else - { - cout << "Error inputpar" << endl; - exit(0); - } - } - ifstream inf(pname, ifstream::in); - if (!inf.good() && myrank == 0) - { - cout << "Can not open parameter file " << pname << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - cout << "error reading parameter file " << pname << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "ABE") - { - if (skey == "gpu part") - gpu_part = atof(sval.c_str()); - } - } - inf.close(); - - parameters::dou_par.insert(map::value_type("gpu part", gpu_part)); - } - - if (nodes == 0) - nodes = cpusize / 2; -#else - if (nodes == 0) - nodes = cpusize; -#endif - - if (dim != 3) - { - cout << "distrivute: now we only support 3-dimension" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - MyList *BlL = 0; - - int split_size, min_size, block_size = 0; - - int min_width = 2 * Mymax(ghost_width, buffer_width); - int nxyz[dim], mmin_width[dim], min_shape[dim]; - - MyList *PLi = PatchLIST; - for (int i = 0; i < dim; i++) - min_shape[i] = PLi->data->shape[i]; - int lev = PLi->data->lev; - PLi = PLi->next; - while (PLi) - { - Patch *PP = PLi->data; - for (int i = 0; i < dim; i++) - min_shape[i] = Mymin(min_shape[i], PP->shape[i]); - if (lev != PLi->data->lev) - cout << "Parallel::distribute CAUSTION: meet Patches for different level: " << lev << " and " << PLi->data->lev << endl; - PLi = PLi->next; - } - - for (int i = 0; i < dim; i++) - mmin_width[i] = Mymin(min_width, min_shape[i]); - - min_size = mmin_width[0]; - for (int i = 1; i < dim; i++) - min_size = min_size * mmin_width[i]; - - PLi = PatchLIST; - while (PLi) - { - Patch *PP = PLi->data; - // PP->checkPatch(true); - int bs = PP->shape[0]; - for (int i = 1; i < dim; i++) - bs = bs * PP->shape[i]; - block_size = block_size + bs; - PLi = PLi->next; - } - split_size = Mymax(min_size, block_size / nodes); - split_size = Mymax(1, split_size); - - int n_rank = 0; - PLi = PatchLIST; - int reacpu = 0; - while (PLi) - { - Patch *PP = PLi->data; - - reacpu += partition3(nxyz, split_size, mmin_width, nodes, PP->shape); - - Block *ng0, *ng; - int shape_here[dim], ibbox_here[2 * dim]; - double bbox_here[2 * dim], dd; - - // ibbox : 0,...N-1 - for (int i = 0; i < nxyz[0]; i++) - for (int j = 0; j < nxyz[1]; j++) - for (int k = 0; k < nxyz[2]; k++) - { - ibbox_here[0] = (PP->shape[0] * i) / nxyz[0]; - ibbox_here[3] = (PP->shape[0] * (i + 1)) / nxyz[0] - 1; - ibbox_here[1] = (PP->shape[1] * j) / nxyz[1]; - ibbox_here[4] = (PP->shape[1] * (j + 1)) / nxyz[1] - 1; - ibbox_here[2] = (PP->shape[2] * k) / nxyz[2]; - ibbox_here[5] = (PP->shape[2] * (k + 1)) / nxyz[2] - 1; - - if (periodic) - { - ibbox_here[0] = ibbox_here[0] - ghost_width; - ibbox_here[3] = ibbox_here[3] + ghost_width; - ibbox_here[1] = ibbox_here[1] - ghost_width; - ibbox_here[4] = ibbox_here[4] + ghost_width; - ibbox_here[2] = ibbox_here[2] - ghost_width; - ibbox_here[5] = ibbox_here[5] + ghost_width; - } - else - { - ibbox_here[0] = Mymax(0, ibbox_here[0] - ghost_width); - ibbox_here[3] = Mymin(PP->shape[0] - 1, ibbox_here[3] + ghost_width); - ibbox_here[1] = Mymax(0, ibbox_here[1] - ghost_width); - ibbox_here[4] = Mymin(PP->shape[1] - 1, ibbox_here[4] + ghost_width); - ibbox_here[2] = Mymax(0, ibbox_here[2] - ghost_width); - ibbox_here[5] = Mymin(PP->shape[2] - 1, ibbox_here[5] + ghost_width); - } - - shape_here[0] = ibbox_here[3] - ibbox_here[0] + 1; - shape_here[1] = ibbox_here[4] - ibbox_here[1] + 1; - shape_here[2] = ibbox_here[5] - ibbox_here[2] + 1; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - // 0--4, 5--10 - dd = (PP->bbox[3] - PP->bbox[0]) / (PP->shape[0] - 1); - bbox_here[0] = PP->bbox[0] + ibbox_here[0] * dd; - bbox_here[3] = PP->bbox[0] + ibbox_here[3] * dd; - - dd = (PP->bbox[4] - PP->bbox[1]) / (PP->shape[1] - 1); - bbox_here[1] = PP->bbox[1] + ibbox_here[1] * dd; - bbox_here[4] = PP->bbox[1] + ibbox_here[4] * dd; - - dd = (PP->bbox[5] - PP->bbox[2]) / (PP->shape[2] - 1); - bbox_here[2] = PP->bbox[2] + ibbox_here[2] * dd; - bbox_here[5] = PP->bbox[2] + ibbox_here[5] * dd; -#else -#ifdef Cell - // 0--5, 5--10 - dd = (PP->bbox[3] - PP->bbox[0]) / PP->shape[0]; - bbox_here[0] = PP->bbox[0] + (ibbox_here[0]) * dd; - bbox_here[3] = PP->bbox[0] + (ibbox_here[3] + 1) * dd; - - dd = (PP->bbox[4] - PP->bbox[1]) / PP->shape[1]; - bbox_here[1] = PP->bbox[1] + (ibbox_here[1]) * dd; - bbox_here[4] = PP->bbox[1] + (ibbox_here[4] + 1) * dd; - - dd = (PP->bbox[5] - PP->bbox[2]) / PP->shape[2]; - bbox_here[2] = PP->bbox[2] + (ibbox_here[2]) * dd; - bbox_here[5] = PP->bbox[2] + (ibbox_here[5] + 1) * dd; -#else -#error Not define Vertex nor Cell -#endif -#endif - -#ifdef USE_GPU_DIVIDE - { - const int pices = 2; - double picef[pices]; - picef[0] = cpu_part; - picef[1] = gpu_part; - int shape_res[dim * pices]; - double bbox_res[2 * dim * pices]; - misc::dividBlock(dim, shape_here, bbox_here, pices, picef, shape_res, bbox_res, min_width); - ng = ng0 = new Block(dim, shape_res, bbox_res, n_rank++, ingfsi, fngfsi, PP->lev, 0); // delete through KillBlocks - - // if(n_rank==cpusize) {n_rank=0; cerr<<"place one!!"<checkBlock(); - if (BlL) - BlL->insert(ng); - else - BlL = new MyList(ng); // delete through KillBlocks - - for (int i = 1; i < pices; i++) - { - ng = new Block(dim, shape_res + i * dim, bbox_res + i * 2 * dim, n_rank++, ingfsi, fngfsi, PP->lev, i); // delete through KillBlocks - // if(n_rank==cpusize) {n_rank=0; cerr<<"place two!! "<checkBlock(); - BlL->insert(ng); - } - } -#else - ng = ng0 = new Block(dim, shape_here, bbox_here, n_rank++, ingfsi, fngfsi, PP->lev); // delete through KillBlocks - // ng->checkBlock(); - if (BlL) - BlL->insert(ng); - else - BlL = new MyList(ng); // delete through KillBlocks -#endif - if (n_rank == cpusize) - n_rank = 0; - - // set PP->blb - if (i == 0 && j == 0 && k == 0) - { - MyList *Bp = BlL; - while (Bp->data != ng0) - Bp = Bp->next; // ng0 is the first of the pices list - PP->blb = Bp; - } - } - // set PP->ble - { - MyList *Bp = BlL; - while (Bp->data != ng) - Bp = Bp->next; // ng is the last of the pices list - PP->ble = Bp; - } - PLi = PLi->next; - } - if (reacpu < nodes * 2 / 3) - { - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - if (myrank == 0) - cout << "Parallel::distribute CAUSTION: level#" << lev << " uses essencially " << reacpu << " processors vs " << nodes << " nodes run, your scientific computation scale is not as large as you estimate." << endl; - } - - return BlL; -} -#elif (PSTR == 1 || PSTR == 2 || PSTR == 3) -MyList *Parallel::distribute(MyList *PatchLIST, int cpusize, int ingfsi, int fngfsi, - bool periodic, int start_rank, int end_rank, int nodes) -{ -#ifdef USE_GPU_DIVIDE - double cpu_part, gpu_part; - map::iterator iter; - iter = parameters::dou_par.find("cpu part"); - if (iter != parameters::dou_par.end()) - { - cpu_part = iter->second; - } - else - { - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - // read parameter from file - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - char pname[50]; - { - map::iterator iter = parameters::str_par.find("inputpar"); - if (iter != parameters::str_par.end()) - { - strcpy(pname, (iter->second).c_str()); - } - else - { - cout << "Error inputpar" << endl; - exit(0); - } - } - ifstream inf(pname, ifstream::in); - if (!inf.good() && myrank == 0) - { - cout << "Can not open parameter file " << pname << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - cout << "error reading parameter file " << pname << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "ABE") - { - if (skey == "cpu part") - cpu_part = atof(sval.c_str()); - } - } - inf.close(); - - parameters::dou_par.insert(map::value_type("cpu part", cpu_part)); - } - iter = parameters::dou_par.find("gpu part"); - if (iter != parameters::dou_par.end()) - { - gpu_part = iter->second; - } - else - { - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - // read parameter from file - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - char pname[50]; - { - map::iterator iter = parameters::str_par.find("inputpar"); - if (iter != parameters::str_par.end()) - { - strcpy(pname, (iter->second).c_str()); - } - else - { - cout << "Error inputpar" << endl; - exit(0); - } - } - ifstream inf(pname, ifstream::in); - if (!inf.good() && myrank == 0) - { - cout << "Can not open parameter file " << pname << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - cout << "error reading parameter file " << pname << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "ABE") - { - if (skey == "gpu part") - gpu_part = atof(sval.c_str()); - } - } - inf.close(); - - parameters::dou_par.insert(map::value_type("gpu part", gpu_part)); - } - - if (nodes == 0) - nodes = cpusize / 2; -#else - if (nodes == 0) - nodes = cpusize; -#endif - - if (dim != 3) - { - cout << "distrivute: now we only support 3-dimension" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - MyList *BlL = 0; - - int split_size, min_size, block_size = 0; - - int min_width = 2 * Mymax(ghost_width, buffer_width); - int nxyz[dim], mmin_width[dim], min_shape[dim]; - - MyList *PLi = PatchLIST; - for (int i = 0; i < dim; i++) - min_shape[i] = PLi->data->shape[i]; - int lev = PLi->data->lev; - PLi = PLi->next; - while (PLi) - { - Patch *PP = PLi->data; - for (int i = 0; i < dim; i++) - min_shape[i] = Mymin(min_shape[i], PP->shape[i]); - if (lev != PLi->data->lev) - cout << "Parallel::distribute CAUSTION: meet Patches for different level: " << lev << " and " << PLi->data->lev << endl; - PLi = PLi->next; - } - - for (int i = 0; i < dim; i++) - mmin_width[i] = Mymin(min_width, min_shape[i]); - - min_size = mmin_width[0]; - for (int i = 1; i < dim; i++) - min_size = min_size * mmin_width[i]; - - PLi = PatchLIST; - while (PLi) - { - Patch *PP = PLi->data; - // PP->checkPatch(true); - int bs = PP->shape[0]; - for (int i = 1; i < dim; i++) - bs = bs * PP->shape[i]; - block_size = block_size + bs; - PLi = PLi->next; - } - split_size = Mymax(min_size, block_size / cpusize); - split_size = Mymax(1, split_size); - - int n_rank = start_rank; - PLi = PatchLIST; - int reacpu = 0; - while (PLi) - { - Patch *PP = PLi->data; - - reacpu += partition3(nxyz, split_size, mmin_width, cpusize, PP->shape); - - Block *ng, *ng0; - int shape_here[dim], ibbox_here[2 * dim]; - double bbox_here[2 * dim], dd; - - // ibbox : 0,...N-1 - for (int i = 0; i < nxyz[0]; i++) - for (int j = 0; j < nxyz[1]; j++) - for (int k = 0; k < nxyz[2]; k++) - { - ibbox_here[0] = (PP->shape[0] * i) / nxyz[0]; - ibbox_here[3] = (PP->shape[0] * (i + 1)) / nxyz[0] - 1; - ibbox_here[1] = (PP->shape[1] * j) / nxyz[1]; - ibbox_here[4] = (PP->shape[1] * (j + 1)) / nxyz[1] - 1; - ibbox_here[2] = (PP->shape[2] * k) / nxyz[2]; - ibbox_here[5] = (PP->shape[2] * (k + 1)) / nxyz[2] - 1; - - if (periodic) - { - ibbox_here[0] = ibbox_here[0] - ghost_width; - ibbox_here[3] = ibbox_here[3] + ghost_width; - ibbox_here[1] = ibbox_here[1] - ghost_width; - ibbox_here[4] = ibbox_here[4] + ghost_width; - ibbox_here[2] = ibbox_here[2] - ghost_width; - ibbox_here[5] = ibbox_here[5] + ghost_width; - } - else - { - ibbox_here[0] = Mymax(0, ibbox_here[0] - ghost_width); - ibbox_here[3] = Mymin(PP->shape[0] - 1, ibbox_here[3] + ghost_width); - ibbox_here[1] = Mymax(0, ibbox_here[1] - ghost_width); - ibbox_here[4] = Mymin(PP->shape[1] - 1, ibbox_here[4] + ghost_width); - ibbox_here[2] = Mymax(0, ibbox_here[2] - ghost_width); - ibbox_here[5] = Mymin(PP->shape[2] - 1, ibbox_here[5] + ghost_width); - } - - shape_here[0] = ibbox_here[3] - ibbox_here[0] + 1; - shape_here[1] = ibbox_here[4] - ibbox_here[1] + 1; - shape_here[2] = ibbox_here[5] - ibbox_here[2] + 1; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - // 0--4, 5--10 - dd = (PP->bbox[3] - PP->bbox[0]) / (PP->shape[0] - 1); - bbox_here[0] = PP->bbox[0] + ibbox_here[0] * dd; - bbox_here[3] = PP->bbox[0] + ibbox_here[3] * dd; - - dd = (PP->bbox[4] - PP->bbox[1]) / (PP->shape[1] - 1); - bbox_here[1] = PP->bbox[1] + ibbox_here[1] * dd; - bbox_here[4] = PP->bbox[1] + ibbox_here[4] * dd; - - dd = (PP->bbox[5] - PP->bbox[2]) / (PP->shape[2] - 1); - bbox_here[2] = PP->bbox[2] + ibbox_here[2] * dd; - bbox_here[5] = PP->bbox[2] + ibbox_here[5] * dd; -#else -#ifdef Cell - // 0--5, 5--10 - dd = (PP->bbox[3] - PP->bbox[0]) / PP->shape[0]; - bbox_here[0] = PP->bbox[0] + (ibbox_here[0]) * dd; - bbox_here[3] = PP->bbox[0] + (ibbox_here[3] + 1) * dd; - - dd = (PP->bbox[4] - PP->bbox[1]) / PP->shape[1]; - bbox_here[1] = PP->bbox[1] + (ibbox_here[1]) * dd; - bbox_here[4] = PP->bbox[1] + (ibbox_here[4] + 1) * dd; - - dd = (PP->bbox[5] - PP->bbox[2]) / PP->shape[2]; - bbox_here[2] = PP->bbox[2] + (ibbox_here[2]) * dd; - bbox_here[5] = PP->bbox[2] + (ibbox_here[5] + 1) * dd; -#else -#error Not define Vertex nor Cell -#endif -#endif - -#ifdef USE_GPU_DIVIDE - { - const int pices = 2; - double picef[pices]; - picef[0] = cpu_part; - picef[1] = gpu_part; - int shape_res[dim * pices]; - double bbox_res[2 * dim * pices]; - misc::dividBlock(dim, shape_here, bbox_here, pices, picef, shape_res, bbox_res, min_width); - ng = ng0 = new Block(dim, shape_res, bbox_res, n_rank++, ingfsi, fngfsi, PP->lev, 0); // delete through KillBlocks - // ng->checkBlock(); - if (BlL) - BlL->insert(ng); - else - BlL = new MyList(ng); // delete through KillBlocks - - for (int i = 1; i < pices; i++) - { - ng = new Block(dim, shape_res + i * dim, bbox_res + i * 2 * dim, n_rank++, ingfsi, fngfsi, PP->lev, i); // delete through KillBlocks - // ng->checkBlock(); - BlL->insert(ng); - } - } -#else - ng = ng0 = new Block(dim, shape_here, bbox_here, n_rank++, ingfsi, fngfsi, PP->lev); // delete through KillBlocks - // ng->checkBlock(); - if (BlL) - BlL->insert(ng); - else - BlL = new MyList(ng); // delete through KillBlocks -#endif - - if (n_rank == end_rank + 1) - n_rank = start_rank; - - // set PP->blb - if (i == 0 && j == 0 && k == 0) - { - MyList *Bp = BlL; - while (Bp->data != ng0) - Bp = Bp->next; // ng0 is the first of the pices list - PP->blb = Bp; - } - } - // set PP->ble - { - MyList *Bp = BlL; - while (Bp->data != ng) - Bp = Bp->next; // ng is the last of the pices list - PP->ble = Bp; - } - PLi = PLi->next; - } - if (reacpu < nodes * 2 / 3) - { - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - if (myrank == start_rank) - cout << "Parallel::distribute CAUSTION: level#" << lev << " uses essencially " << reacpu << " processors vs " << nodes << " nodes run, your scientific computation scale is not as large as you estimate." << endl; - } - - return BlL; -} -#endif -void Parallel::setfunction(MyList *BlL, var *vn, double func(double x, double y, double z)) -{ - while (BlL) - { - if (BlL->data->X[0]) - { - int nn = BlL->data->shape[0] * BlL->data->shape[1] * BlL->data->shape[2]; - double *p = BlL->data->fgfs[vn->sgfn]; - for (int i = 0; i < nn; i++) - { - int ind[3]; - getarrayindex(3, BlL->data->shape, ind, i); - p[i] = func(BlL->data->X[0][ind[0]], BlL->data->X[1][ind[1]], BlL->data->X[2][ind[2]]); - } - } - BlL = BlL->next; - } -} -// set function only for cpu rank -void Parallel::setfunction(int rank, MyList *BlL, var *vn, double func(double x, double y, double z)) -{ - while (BlL) - { - if (BlL->data->X[0] && BlL->data->rank == rank) - { - int nn = BlL->data->shape[0] * BlL->data->shape[1] * BlL->data->shape[2]; - double *p = BlL->data->fgfs[vn->sgfn]; - for (int i = 0; i < nn; i++) - { - int ind[3]; - getarrayindex(3, BlL->data->shape, ind, i); - p[i] = func(BlL->data->X[0][ind[0]], BlL->data->X[1][ind[1]], BlL->data->X[2][ind[2]]); - } - } - BlL = BlL->next; - } -} -void Parallel::getarrayindex(int DIM, int *shape, int *index, int n) -{ - // we assume index has already memory space - int *mu; - mu = new int[DIM]; - mu[0] = 1; - for (int i = 1; i < DIM; i++) - mu[i] = mu[i - 1] * shape[i - 1]; - for (int i = DIM - 1; i >= 0; i--) - { - index[i] = n / mu[i]; - n = n - index[i] * mu[i]; - } - - delete[] mu; -} -int Parallel::getarraylocation(int DIM, int *shape, int *index) -{ - int n, mu; - mu = shape[0]; - n = index[0]; - for (int i = 1; i < DIM; i++) - { - n = n + index[i] * mu; - mu = mu * shape[i]; - } - - return n; -} -void Parallel::copy(int DIM, double *llbout, double *uubout, int *Dshape, double *DD, double *llbin, double *uubin, - int *shape, double *datain, double *llb, double *uub) -{ - // for 3 dimensional case, based on simple test, I found this is half slower than f90 code - int *illi, *iuui; - int *illo, *iuuo; - int *indi, *indo; - illi = new int[DIM]; - iuui = new int[DIM]; - illo = new int[DIM]; - iuuo = new int[DIM]; - indi = new int[DIM]; - indo = new int[DIM]; - - int ial = 1; - for (int i = 0; i < DIM; i++) - { - double ho, hi; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - ho = (uubout[i] - llbout[i]) / (Dshape[i] - 1); - hi = (uubin[i] - llbin[i]) / (shape[i] - 1); -#else -#ifdef Cell - ho = (uubout[i] - llbout[i]) / Dshape[i]; - hi = (uubin[i] - llbin[i]) / shape[i]; -#else -#error Not define Vertex nor Cell -#endif -#endif - illo[i] = int((llb[i] - llbout[i]) / ho); - iuuo[i] = Dshape[i] - 1 - int((uubout[i] - uub[i]) / ho); - illi[i] = int((llb[i] - llbin[i]) / hi); - iuui[i] = shape[i] - 1 - int((uubin[i] - uub[i]) / hi); - - if (illo[i] > iuuo[i] || illi[i] > iuui[i] || illo[i] < 0 || illi[i] < 0 || - iuui[i] >= shape[i] || iuuo[i] >= Dshape[i]) - { - cout << "Parallel copy: in direction " << i << ":" << endl; - cout << "llb = " << llb[i] << ", uub = " << uub[i] << endl; - cout << " in data : il = " << illi[i] << ", iu = " << iuui[i] << endl; - cout << "bbox = (" << llbin[i] << "," << uubin[i] << ")" << endl; - cout << "shape = " << shape[i] << endl; - cout << "out data : il = " << illo[i] << ", iu = " << iuuo[i] << endl; - cout << "bbox = (" << llbout[i] << "," << uubout[i] << ")" << endl; - cout << "shape = " << Dshape[i] << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - int ihi = iuui[i] - illi[i] + 1, iho = iuuo[i] - illo[i] + 1; - if (!(feq(ho, hi, ho / 2)) || ihi != iho) - { - cout << "Parallel copy: in direction " << i << ":" << endl; - cout << "Parallel copy: not the same grid structure." << endl; - cout << "hi = " << hi << ", bbox = (" << llbin[i] << "," << uubin[i] << "), shape = " << shape[i] << endl; - cout << "ho = " << ho << ", bbox = (" << llbout[i] << "," << uubout[i] << "), shape = " << Dshape[i] << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - ial = ial * ihi; - } - - for (int i = 0; i < DIM; i++) - { - indi[i] = illi[i]; - indo[i] = illo[i]; - } - /* - //check start index - for(int i=0;i NNi) - { - cout << "Parallel copy: ni = " << ni << " is out of array range (0," << NNi << ")." << endl; - cout << "shape = ("; - for (int j = 0; j < DIM; j++) - { - cout << shape[j]; - if (j < DIM - 1) - cout << ","; - else - cout << ")" << endl; - } - cout << "ind = ("; - for (int j = 0; j < DIM; j++) - { - cout << indi[j]; - if (j < DIM - 1) - cout << ","; - else - cout << ")" << endl; - } - MPI_Abort(MPI_COMM_WORLD, 1); - } - DD[no] = datain[ni]; - - indi[0]++; - for (int j = 1; j < DIM; j++) - { - if (indi[j - 1] == iuui[j - 1] + 1) - { - indi[j - 1] = illi[j - 1]; - indi[j]++; - } // carry 1 to next digital - else - break; - } - indo[0]++; - for (int j = 1; j < DIM; j++) - { - if (indo[j - 1] == iuuo[j - 1] + 1) - { - indo[j - 1] = illo[j - 1]; - indo[j]++; - } - else - break; - } - } - /* - //check final index - for(int i=0;i *BlL, MyList *DumpList, char *tag, double time, double dT) -{ - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - // round at 4 and 5 - int ncount = int(time / dT + 0.5); - - MyList *Bp; - while (DumpList) - { - Bp = BlL; - int Bi = 0; - while (Bp) - { - Block *BP = Bp->data; - var *VP = DumpList->data; - if (BP->rank == myrank) - { - - string out_dir; - map::iterator iter; - iter = parameters::str_par.find("output dir"); - if (iter != parameters::str_par.end()) - { - out_dir = iter->second; - } - else - { - // read parameter from file - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - char pname[50]; - { - map::iterator iter = parameters::str_par.find("inputpar"); - if (iter != parameters::str_par.end()) - { - strcpy(pname, (iter->second).c_str()); - } - else - { - cout << "Error inputpar" << endl; - exit(0); - } - } - ifstream inf(pname, ifstream::in); - if (!inf.good()) - { - cout << "Can not open parameter file " << pname << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - cout << "error reading parameter file " << pname << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "ABE") - { - if (skey == "output dir") - out_dir = sval; - } - } - inf.close(); - - parameters::str_par.insert(map::value_type("output dir", out_dir)); - } - - char filename[100]; - if (tag) - sprintf(filename, "%s/%s_Lev%02d-%02d_%02d_%s_%05d.bin", out_dir.c_str(), tag, BP->lev, Bi, myrank, VP->name, ncount); - else - sprintf(filename, "%s/Lev%02d-%02d_%02d_%s_%05d.bin", out_dir.c_str(), BP->lev, Bi, myrank, VP->name, ncount); - writefile(time, BP->shape[0], BP->shape[1], BP->shape[2], BP->bbox[0], BP->bbox[3], BP->bbox[1], BP->bbox[4], - BP->bbox[2], BP->bbox[5], filename, BP->fgfs[VP->sgfn]); - cout << "end of dump " << VP->name << " at time " << time << ", on node " << myrank << endl; - } - Bp = Bp->next; - Bi++; - } - DumpList = DumpList->next; - } -} -// Now we dump the data including buffer points -void Parallel::Dump_Data(Patch *PP, MyList *DumpList, char *tag, double time, double dT, int grd) -{ - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - // round at 4 and 5 - int ncount = int(time / dT + 0.5); - - MPI_Status sta; - int DIM = 3; - double llb[3], uub[3]; - double DX, DY, DZ; - - double *databuffer = 0; - if (myrank == 0) - { - databuffer = (double *)malloc(sizeof(double) * PP->shape[0] * PP->shape[1] * PP->shape[2]); - if (!databuffer) - { - cout << "Parallel::Dump_Data: out of memory when dumping data." << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - - while (DumpList) - { - var *VP = DumpList->data; - - MyList *Bp = PP->blb; - while (Bp) - { - Block *BP = Bp->data; - if (BP->rank == 0 && myrank == 0) - { - DX = BP->getdX(0); - DY = BP->getdX(1); - DZ = BP->getdX(2); - llb[0] = (feq(BP->bbox[0], PP->bbox[0], DX / 2)) ? BP->bbox[0] : BP->bbox[0] + ghost_width * DX; - llb[1] = (feq(BP->bbox[1], PP->bbox[1], DY / 2)) ? BP->bbox[1] : BP->bbox[1] + ghost_width * DY; - llb[2] = (feq(BP->bbox[2], PP->bbox[2], DZ / 2)) ? BP->bbox[2] : BP->bbox[2] + ghost_width * DZ; - uub[0] = (feq(BP->bbox[3], PP->bbox[3], DX / 2)) ? BP->bbox[3] : BP->bbox[3] - ghost_width * DX; - uub[1] = (feq(BP->bbox[4], PP->bbox[4], DY / 2)) ? BP->bbox[4] : BP->bbox[4] - ghost_width * DY; - uub[2] = (feq(BP->bbox[5], PP->bbox[5], DZ / 2)) ? BP->bbox[5] : BP->bbox[5] - ghost_width * DZ; - f_copy(DIM, PP->bbox, PP->bbox + DIM, PP->shape, databuffer, BP->bbox, BP->bbox + DIM, BP->shape, BP->fgfs[VP->sgfn], llb, uub); - } - else - { - int nnn = (BP->shape[0]) * (BP->shape[1]) * (BP->shape[2]); - if (myrank == 0) - { - double *bufferhere = (double *)malloc(sizeof(double) * nnn); - if (!bufferhere) - { - cout << "on node#" << myrank << ", out of memory when dumping data." << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - MPI_Recv(bufferhere, nnn, MPI_DOUBLE, BP->rank, 0, MPI_COMM_WORLD, &sta); - DX = BP->getdX(0); - DY = BP->getdX(1); - DZ = BP->getdX(2); - llb[0] = (feq(BP->bbox[0], PP->bbox[0], DX / 2)) ? BP->bbox[0] : BP->bbox[0] + ghost_width * DX; - llb[1] = (feq(BP->bbox[1], PP->bbox[1], DY / 2)) ? BP->bbox[1] : BP->bbox[1] + ghost_width * DY; - llb[2] = (feq(BP->bbox[2], PP->bbox[2], DZ / 2)) ? BP->bbox[2] : BP->bbox[2] + ghost_width * DZ; - uub[0] = (feq(BP->bbox[3], PP->bbox[3], DX / 2)) ? BP->bbox[3] : BP->bbox[3] - ghost_width * DX; - uub[1] = (feq(BP->bbox[4], PP->bbox[4], DY / 2)) ? BP->bbox[4] : BP->bbox[4] - ghost_width * DY; - uub[2] = (feq(BP->bbox[5], PP->bbox[5], DZ / 2)) ? BP->bbox[5] : BP->bbox[5] - ghost_width * DZ; - f_copy(DIM, PP->bbox, PP->bbox + DIM, PP->shape, databuffer, BP->bbox, BP->bbox + DIM, BP->shape, bufferhere, llb, uub); - free(bufferhere); - } - else if (myrank == BP->rank) - { - MPI_Send(BP->fgfs[VP->sgfn], nnn, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); - } - } - if (Bp == PP->ble) - break; - Bp = Bp->next; - } - if (myrank == 0) - { - - string out_dir; - map::iterator iter; - iter = parameters::str_par.find("output dir"); - if (iter != parameters::str_par.end()) - { - out_dir = iter->second; - } - else - { - // read parameter from file - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - char pname[50]; - { - map::iterator iter = parameters::str_par.find("inputpar"); - if (iter != parameters::str_par.end()) - { - strcpy(pname, (iter->second).c_str()); - } - else - { - cout << "Error inputpar" << endl; - exit(0); - } - } - ifstream inf(pname, ifstream::in); - if (!inf.good()) - { - cout << "Can not open parameter file " << pname << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - cout << "error reading parameter file " << pname << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "ABE") - { - if (skey == "output dir") - out_dir = sval; - } - } - inf.close(); - - parameters::str_par.insert(map::value_type("output dir", out_dir)); - } - - char filename[100]; - if (tag) - sprintf(filename, "%s/%s_Lev%02d-%02d_%s_%05d.bin", out_dir.c_str(), tag, PP->lev, grd, VP->name, ncount); - else - sprintf(filename, "%s/Lev%02d-%02d_%s_%05d.bin", out_dir.c_str(), PP->lev, grd, VP->name, ncount); - - writefile(time, PP->shape[0], PP->shape[1], PP->shape[2], PP->bbox[0], PP->bbox[3], PP->bbox[1], PP->bbox[4], - PP->bbox[2], PP->bbox[5], filename, databuffer); - } - DumpList = DumpList->next; - } - - if (myrank == 0) - free(databuffer); -} -void Parallel::Dump_Data(MyList *PL, MyList *DumpList, char *tag, double time, double dT) -{ - MyList *Pp; - Pp = PL; - int grd = 0; - while (Pp) - { - Patch *PP = Pp->data; - Dump_Data(PP, DumpList, tag, time, dT, grd); - grd++; - Pp = Pp->next; - } -} -// collect the data including buffer points -double *Parallel::Collect_Data(Patch *PP, var *VP) -{ - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - - MPI_Status sta; - int DIM = 3; - double llb[3], uub[3]; - double DX, DY, DZ; - - double *databuffer = 0; - if (myrank == 0) - { - databuffer = (double *)malloc(sizeof(double) * PP->shape[0] * PP->shape[1] * PP->shape[2]); - if (!databuffer) - { - cout << "Parallel::Collect_Data: out of memory when dumping data." << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - - MyList *Bp = PP->blb; - while (Bp) - { - Block *BP = Bp->data; - if (BP->rank == 0 && myrank == 0) - { - DX = BP->getdX(0); - DY = BP->getdX(1); - DZ = BP->getdX(2); - llb[0] = (feq(BP->bbox[0], PP->bbox[0], DX / 2)) ? BP->bbox[0] : BP->bbox[0] + ghost_width * DX; - llb[1] = (feq(BP->bbox[1], PP->bbox[1], DY / 2)) ? BP->bbox[1] : BP->bbox[1] + ghost_width * DY; - llb[2] = (feq(BP->bbox[2], PP->bbox[2], DZ / 2)) ? BP->bbox[2] : BP->bbox[2] + ghost_width * DZ; - uub[0] = (feq(BP->bbox[3], PP->bbox[3], DX / 2)) ? BP->bbox[3] : BP->bbox[3] - ghost_width * DX; - uub[1] = (feq(BP->bbox[4], PP->bbox[4], DY / 2)) ? BP->bbox[4] : BP->bbox[4] - ghost_width * DY; - uub[2] = (feq(BP->bbox[5], PP->bbox[5], DZ / 2)) ? BP->bbox[5] : BP->bbox[5] - ghost_width * DZ; - f_copy(DIM, PP->bbox, PP->bbox + DIM, PP->shape, databuffer, BP->bbox, BP->bbox + DIM, BP->shape, BP->fgfs[VP->sgfn], llb, uub); - } - else - { - int nnn = (BP->shape[0]) * (BP->shape[1]) * (BP->shape[2]); - if (myrank == 0) - { - double *bufferhere = (double *)malloc(sizeof(double) * nnn); - if (!bufferhere) - { - cout << "on node#" << myrank << ", out of memory when dumping data." << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - MPI_Recv(bufferhere, nnn, MPI_DOUBLE, BP->rank, 0, MPI_COMM_WORLD, &sta); - DX = BP->getdX(0); - DY = BP->getdX(1); - DZ = BP->getdX(2); - llb[0] = (feq(BP->bbox[0], PP->bbox[0], DX / 2)) ? BP->bbox[0] : BP->bbox[0] + ghost_width * DX; - llb[1] = (feq(BP->bbox[1], PP->bbox[1], DY / 2)) ? BP->bbox[1] : BP->bbox[1] + ghost_width * DY; - llb[2] = (feq(BP->bbox[2], PP->bbox[2], DZ / 2)) ? BP->bbox[2] : BP->bbox[2] + ghost_width * DZ; - uub[0] = (feq(BP->bbox[3], PP->bbox[3], DX / 2)) ? BP->bbox[3] : BP->bbox[3] - ghost_width * DX; - uub[1] = (feq(BP->bbox[4], PP->bbox[4], DY / 2)) ? BP->bbox[4] : BP->bbox[4] - ghost_width * DY; - uub[2] = (feq(BP->bbox[5], PP->bbox[5], DZ / 2)) ? BP->bbox[5] : BP->bbox[5] - ghost_width * DZ; - f_copy(DIM, PP->bbox, PP->bbox + DIM, PP->shape, databuffer, BP->bbox, BP->bbox + DIM, BP->shape, bufferhere, llb, uub); - free(bufferhere); - } - else if (myrank == BP->rank) - { - MPI_Send(BP->fgfs[VP->sgfn], nnn, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); - } - } - if (Bp == PP->ble) - break; - Bp = Bp->next; - } - - return databuffer; -} -// Now we dump the data including buffer points -// dump z = 0 plane -void Parallel::d2Dump_Data(Patch *PP, MyList *DumpList, char *tag, double time, double dT, int grd) -{ - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - // round at 4 and 5 - int ncount = int(time / dT + 0.5); - - MPI_Status sta; - int DIM = 3; - double llb[3], uub[3]; - double DX, DY, DZ; - - double *databuffer = 0, *databuffer2 = 0; - if (myrank == 0) - { - databuffer = (double *)malloc(sizeof(double) * PP->shape[0] * PP->shape[1] * PP->shape[2]); - databuffer2 = (double *)malloc(sizeof(double) * PP->shape[0] * PP->shape[1]); - if (!databuffer || !databuffer2) - { - cout << "Parallel::d2Dump_Data: out of memory when dumping data." << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - - while (DumpList) - { - var *VP = DumpList->data; - - MyList *Bp = PP->blb; - while (Bp) - { - Block *BP = Bp->data; - if (BP->rank == 0 && myrank == 0) - { - DX = BP->getdX(0); - DY = BP->getdX(1); - DZ = BP->getdX(2); - llb[0] = (feq(BP->bbox[0], PP->bbox[0], DX / 2)) ? BP->bbox[0] : BP->bbox[0] + ghost_width * DX; - llb[1] = (feq(BP->bbox[1], PP->bbox[1], DY / 2)) ? BP->bbox[1] : BP->bbox[1] + ghost_width * DY; - llb[2] = (feq(BP->bbox[2], PP->bbox[2], DZ / 2)) ? BP->bbox[2] : BP->bbox[2] + ghost_width * DZ; - uub[0] = (feq(BP->bbox[3], PP->bbox[3], DX / 2)) ? BP->bbox[3] : BP->bbox[3] - ghost_width * DX; - uub[1] = (feq(BP->bbox[4], PP->bbox[4], DY / 2)) ? BP->bbox[4] : BP->bbox[4] - ghost_width * DY; - uub[2] = (feq(BP->bbox[5], PP->bbox[5], DZ / 2)) ? BP->bbox[5] : BP->bbox[5] - ghost_width * DZ; - f_copy(DIM, PP->bbox, PP->bbox + DIM, PP->shape, databuffer, BP->bbox, BP->bbox + DIM, BP->shape, BP->fgfs[VP->sgfn], llb, uub); - } - else - { - int nnn = (BP->shape[0]) * (BP->shape[1]) * (BP->shape[2]); - if (myrank == 0) - { - double *bufferhere = (double *)malloc(sizeof(double) * nnn); - if (!bufferhere) - { - cout << "on node#" << myrank << ", out of memory when dumping data." << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - MPI_Recv(bufferhere, nnn, MPI_DOUBLE, BP->rank, 0, MPI_COMM_WORLD, &sta); - DX = BP->getdX(0); - DY = BP->getdX(1); - DZ = BP->getdX(2); - llb[0] = (feq(BP->bbox[0], PP->bbox[0], DX / 2)) ? BP->bbox[0] : BP->bbox[0] + ghost_width * DX; - llb[1] = (feq(BP->bbox[1], PP->bbox[1], DY / 2)) ? BP->bbox[1] : BP->bbox[1] + ghost_width * DY; - llb[2] = (feq(BP->bbox[2], PP->bbox[2], DZ / 2)) ? BP->bbox[2] : BP->bbox[2] + ghost_width * DZ; - uub[0] = (feq(BP->bbox[3], PP->bbox[3], DX / 2)) ? BP->bbox[3] : BP->bbox[3] - ghost_width * DX; - uub[1] = (feq(BP->bbox[4], PP->bbox[4], DY / 2)) ? BP->bbox[4] : BP->bbox[4] - ghost_width * DY; - uub[2] = (feq(BP->bbox[5], PP->bbox[5], DZ / 2)) ? BP->bbox[5] : BP->bbox[5] - ghost_width * DZ; - f_copy(DIM, PP->bbox, PP->bbox + DIM, PP->shape, databuffer, BP->bbox, BP->bbox + DIM, BP->shape, bufferhere, llb, uub); - free(bufferhere); - } - else if (myrank == BP->rank) - { - MPI_Send(BP->fgfs[VP->sgfn], nnn, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); - } - } - if (Bp == PP->ble) - break; - Bp = Bp->next; - } - if (myrank == 0) - { - - string out_dir; - map::iterator iter; - iter = parameters::str_par.find("output dir"); - if (iter != parameters::str_par.end()) - { - out_dir = iter->second; - } - else - { - // read parameter from file - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - char pname[50]; - { - map::iterator iter = parameters::str_par.find("inputpar"); - if (iter != parameters::str_par.end()) - { - strcpy(pname, (iter->second).c_str()); - } - else - { - cout << "Error inputpar" << endl; - exit(0); - } - } - ifstream inf(pname, ifstream::in); - if (!inf.good()) - { - cout << "Can not open parameter file " << pname << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - cout << "error reading parameter file " << pname << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "ABE") - { - if (skey == "output dir") - out_dir = sval; - } - } - inf.close(); - - parameters::str_par.insert(map::value_type("output dir", out_dir)); - } - - char filename[100]; - if (tag) - sprintf(filename, "%s/%s_2d_Lev%02d-%02d_%s_%05d.dat", out_dir.c_str(), tag, PP->lev, grd, VP->name, ncount); - else - sprintf(filename, "%s/2d_Lev%02d-%02d_%s_%05d.dat", out_dir.c_str(), PP->lev, grd, VP->name, ncount); - - int gord = ghost_width; - f_d2dump(DIM, PP->bbox, PP->bbox + DIM, PP->shape, databuffer, databuffer2, gord, VP->SoA); - writefile(time, PP->shape[0], PP->shape[1], PP->bbox[0], PP->bbox[3], PP->bbox[1], PP->bbox[4], - filename, databuffer2); - } - DumpList = DumpList->next; - } - - if (myrank == 0) - { - free(databuffer); - free(databuffer2); - } -} -void Parallel::d2Dump_Data(MyList *PL, MyList *DumpList, char *tag, double time, double dT) -{ - MyList *Pp; - Pp = PL; - int grd = 0; - while (Pp) - { - Patch *PP = Pp->data; - d2Dump_Data(PP, DumpList, tag, time, dT, grd); - grd++; - Pp = Pp->next; - } -} -// Now we dump the data including buffer points and ghost points of the given patch -void Parallel::Dump_Data0(Patch *PP, MyList *DumpList, char *tag, double time, double dT) -{ - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - // round at 4 and 5 - int ncount = int(time / dT + 0.5); - - MPI_Status sta; - int DIM = 3; - double llb[3], uub[3], tllb[3], tuub[3]; - int tshape[3]; - double DX, DY, DZ; - - for (int i = 0; i < 3; i++) - { - double DX = PP->blb->data->getdX(i); - tshape[i] = PP->shape[i] + 2 * ghost_width; - tllb[i] = PP->bbox[i] - ghost_width * DX; - tuub[i] = PP->bbox[i + dim] + ghost_width * DX; - } - - int NN = tshape[0] * tshape[1] * tshape[2]; - double *databuffer = 0; - if (myrank == 0) - { - databuffer = (double *)malloc(sizeof(double) * NN); - if (!databuffer) - { - cout << "on node# " << myrank << ", out of memory when dumping data." << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - - while (DumpList) - { - var *VP = DumpList->data; - MyList *Bp = PP->blb; - while (Bp) - { - Block *BP = Bp->data; - if (BP->rank == 0 && myrank == 0) - { - DX = BP->getdX(0); - DY = BP->getdX(1); - DZ = BP->getdX(2); - llb[0] = (feq(BP->bbox[0], tllb[0], DX / 2)) ? BP->bbox[0] : BP->bbox[0] + ghost_width * DX; - llb[1] = (feq(BP->bbox[1], tllb[1], DY / 2)) ? BP->bbox[1] : BP->bbox[1] + ghost_width * DY; - llb[2] = (feq(BP->bbox[2], tllb[2], DZ / 2)) ? BP->bbox[2] : BP->bbox[2] + ghost_width * DZ; - uub[0] = (feq(BP->bbox[3], tuub[0], DX / 2)) ? BP->bbox[3] : BP->bbox[3] - ghost_width * DX; - uub[1] = (feq(BP->bbox[4], tuub[1], DY / 2)) ? BP->bbox[4] : BP->bbox[4] - ghost_width * DY; - uub[2] = (feq(BP->bbox[5], tuub[2], DZ / 2)) ? BP->bbox[5] : BP->bbox[5] - ghost_width * DZ; - f_copy(DIM, tllb, tuub, tshape, databuffer, BP->bbox, BP->bbox + DIM, BP->shape, BP->fgfs[VP->sgfn], llb, uub); - } - else - { - if (myrank == 0) - { - int nnn = (BP->shape[0]) * (BP->shape[1]) * (BP->shape[2]); - double *bufferhere = (double *)malloc(sizeof(double) * nnn); - if (!bufferhere) - { - cout << "on node#" << myrank << ", out of memory when dumping data." << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - MPI_Recv(bufferhere, nnn, MPI_DOUBLE, BP->rank, 0, MPI_COMM_WORLD, &sta); - DX = BP->getdX(0); - DY = BP->getdX(1); - DZ = BP->getdX(2); - llb[0] = (feq(BP->bbox[0], tllb[0], DX / 2)) ? BP->bbox[0] : BP->bbox[0] + ghost_width * DX; - llb[1] = (feq(BP->bbox[1], tllb[1], DY / 2)) ? BP->bbox[1] : BP->bbox[1] + ghost_width * DY; - llb[2] = (feq(BP->bbox[2], tllb[2], DZ / 2)) ? BP->bbox[2] : BP->bbox[2] + ghost_width * DZ; - uub[0] = (feq(BP->bbox[3], tuub[0], DX / 2)) ? BP->bbox[3] : BP->bbox[3] - ghost_width * DX; - uub[1] = (feq(BP->bbox[4], tuub[1], DY / 2)) ? BP->bbox[4] : BP->bbox[4] - ghost_width * DY; - uub[2] = (feq(BP->bbox[5], tuub[2], DZ / 2)) ? BP->bbox[5] : BP->bbox[5] - ghost_width * DZ; - f_copy(DIM, tllb, tuub, tshape, databuffer, BP->bbox, BP->bbox + DIM, BP->shape, bufferhere, llb, uub); - free(bufferhere); - } - else if (myrank == BP->rank) - { - int nnn = (BP->shape[0]) * (BP->shape[1]) * (BP->shape[2]); - MPI_Send(BP->fgfs[VP->sgfn], nnn, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); - } - } - if (Bp == PP->ble) - break; - Bp = Bp->next; - } - if (myrank == 0) - { - - string out_dir; - map::iterator iter; - iter = parameters::str_par.find("output dir"); - if (iter != parameters::str_par.end()) - { - out_dir = iter->second; - } - else - { - // read parameter from file - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - char pname[50]; - { - map::iterator iter = parameters::str_par.find("inputpar"); - if (iter != parameters::str_par.end()) - { - strcpy(pname, (iter->second).c_str()); - } - else - { - cout << "Error inputpar" << endl; - exit(0); - } - } - ifstream inf(pname, ifstream::in); - if (!inf.good()) - { - cout << "Can not open parameter file " << pname << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - cout << "error reading parameter file " << pname << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "ABE") - { - if (skey == "output dir") - out_dir = sval; - } - } - inf.close(); - - parameters::str_par.insert(map::value_type("output dir", out_dir)); - } - - char filename[100]; - if (tag) - sprintf(filename, "%s/%s_Lev%02d_%s_%05d.bin", out_dir.c_str(), tag, PP->lev, VP->name, ncount); - else - sprintf(filename, "%s/Lev%02d_%s_%05d.bin", out_dir.c_str(), PP->lev, VP->name, ncount); - - writefile(time, tshape[0], tshape[1], tshape[2], tllb[0], tuub[0], tllb[1], tuub[2], - tllb[2], tuub[2], filename, databuffer); - } - DumpList = DumpList->next; - } - - if (myrank == 0) - free(databuffer); -} -// Map point is much easier than maping data itself -// But the main problem is about the points near the boundary -// worst case is -ghost -ghost+1 .... 0 * ...... -double Parallel::global_interp(int DIM, int *ext, double **CoX, double *datain, - double *poXb, int ordn, double *SoA, int Symmetry) -{ - if (DIM != 3) - { - cout << "Parallel::global_interp does not suport DIM = " << DIM << " for Symmetry." << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - double resu; - double poX[3]; - double asgn = 1; - - for (int i = 0; i < 3; i++) - poX[i] = poXb[i]; - - switch (Symmetry) - { - case 2: - for (int i = 0; i < 3; i++) - if (poX[i] < 0) - { - poX[i] = -poX[i]; - asgn = asgn * SoA[i]; - } - break; - case 1: - if (poX[2] < 0) - { - poX[2] = -poX[2]; - asgn = asgn * SoA[2]; - } - } - - int extb[3]; - - for (int i = 0; i < 3; i++) - extb[i] = ext[i]; - - switch (Symmetry) - { -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - case 2: - if (poX[0] < (ghost_width - 1) * (CoX[0][1] - CoX[0][0])) - extb[0] = extb[0] + ghost_width - 1; - if (poX[1] < (ghost_width - 1) * (CoX[1][1] - CoX[1][0])) - extb[1] = extb[1] + ghost_width - 1; - case 1: - if (poX[2] < (ghost_width - 1) * (CoX[2][1] - CoX[2][0])) - extb[2] = extb[2] + ghost_width - 1; -#else -#ifdef Cell - case 2: - if (poX[0] < (ghost_width - 0.5) * (CoX[0][1] - CoX[0][0])) - extb[0] = extb[0] + ghost_width; - if (poX[1] < (ghost_width - 0.5) * (CoX[1][1] - CoX[1][0])) - extb[1] = extb[1] + ghost_width; - case 1: - if (poX[2] < (ghost_width - 0.5) * (CoX[2][1] - CoX[2][0])) - extb[2] = extb[2] + ghost_width; -#else -#error Not define Vertex nor Cell -#endif -#endif - } - - if (extb[0] > ext[0] || extb[1] > ext[1] || extb[2] > ext[2]) - { - double *CoXb[3]; - int Nb = extb[0] * extb[1] * extb[2]; - double *datab; - datab = new double[Nb]; - for (int i = 0; i < 3; i++) - { - CoXb[i] = new double[extb[i]]; - double DH = CoX[i][1] - CoX[i][0]; - if (extb[i] > ext[i]) - { - if (CoX[i][0] > DH) - { - cout << "lower boundary[" << i << "] = " << CoX[i][0] << ", but SYmmetry = " << Symmetry << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - for (int j = 0; j < ghost_width - 1; j++) - CoXb[i][j] = -CoX[i][ghost_width - 1 - j]; - for (int j = ghost_width - 1; j < extb[i]; j++) - CoXb[i][j] = CoX[i][j - ghost_width + 1]; -#else -#ifdef Cell - for (int j = 0; j < ghost_width; j++) - CoXb[i][j] = -CoX[i][ghost_width - 1 - j]; - for (int j = ghost_width; j < extb[i]; j++) - CoXb[i][j] = CoX[i][j - ghost_width]; -#else -#error Not define Vertex nor Cell -#endif -#endif - } - else - { - for (int j = 0; j < extb[i]; j++) - CoXb[i][j] = CoX[i][j]; - } - } - - for (int i = 0; i < Nb; i++) - { - int ind[3], indb[3]; - getarrayindex(3, extb, indb, i); - double sgn = 1; - for (int j = 0; j < 3; j++) - { - if (extb[j] > ext[j]) - { -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - if (indb[j] < ghost_width - 1) - { - ind[j] = ghost_width - 1 - indb[j]; - sgn = sgn * SoA[j]; - } - else - { - ind[j] = 1 + indb[j] - ghost_width; - } -#else -#ifdef Cell - if (indb[j] < ghost_width) - { - ind[j] = ghost_width - 1 - indb[j]; - sgn = sgn * SoA[j]; - } - else - { - ind[j] = indb[j] - ghost_width; - } -#else -#error Not define Vertex nor Cell -#endif -#endif - } - else - ind[j] = indb[j]; - } - int lon = getarraylocation(3, ext, ind); - datab[i] = datain[lon] * sgn; - } - - resu = global_interp(DIM, extb, CoXb, datab, poX, ordn); - - for (int i = 0; i < 3; i++) - delete[] CoXb[i]; - delete[] datab; - } - else - { - resu = global_interp(DIM, ext, CoX, datain, poX, ordn); - } - - return resu * asgn; -} -double Parallel::global_interp(int DIM, int *ext, double **CoX, double *datain, - double *poX, int ordn) -{ - if (ordn > 2 * ghost_width) - { - cout << "Parallel::global_interp can not handle ordn = " << ordn << " > 2*ghost_width = " << 2 * ghost_width << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - double *bbox, *datainbbox; - bbox = new double[2 * DIM]; - datainbbox = new double[2 * DIM]; - - int *NN, *ind, *shape; - NN = new int[DIM]; - ind = new int[DIM]; - shape = new int[DIM]; - - for (int i = 0; i < DIM; i++) - { - ind[i] = int((poX[i] - CoX[i][0]) / (CoX[i][1] - CoX[i][0])) - ordn / 2 + 1; - // poX may exactly locate on the boundary (exclude ghost) - if (ind[i] == -1 && feq(poX[i], CoX[i][0], (CoX[i][1] - CoX[i][0]) / 2)) - ind[i] = 0; - /* - if(ind[i] < 0) - { - cout<<"Parallel::global_interp error ind["< ext = "<= 0; i--) - NN[i] = NN[i + 1] * ordn; - - double *xpts, *funcvals; - xpts = new double[ordn]; - funcvals = new double[ordn]; - double *DDd, *DDd1, rr; - - DDd = new double[NN[0]]; - - copy(DIM, bbox, bbox + DIM, shape, DDd, datainbbox, datainbbox + DIM, ext, datain, bbox, bbox + DIM); - - for (int i = 0; i < DIM; i++) - { - for (int j = ind[i]; j < ind[i] + ordn; j++) - { - xpts[j - ind[i]] = CoX[i][j]; - } - - if (i < DIM - 1) - { - DDd1 = new double[NN[i + 1]]; - for (int j = 0; j < NN[i + 1]; j++) - { - for (int k = 0; k < ordn; k++) - funcvals[k] = DDd[k + j * ordn]; - DDd1[j] = Lagrangian_Int(poX[i], ordn, xpts, funcvals); - } - delete[] DDd; - DDd = DDd1; - } - else - { - for (int j = 0; j < ordn; j++) - funcvals[j] = DDd[j]; - rr = Lagrangian_Int(poX[i], ordn, xpts, funcvals); - delete[] DDd1; // since DDd and DDd1 now point to the same stuff, we need delete after above int - } - } - - delete[] NN; - delete[] ind; - delete[] xpts; - delete[] funcvals; - delete[] bbox; - delete[] datainbbox; - delete[] shape; - - return rr; -} -double Parallel::Lagrangian_Int(double x, int npts, double *xpts, double *funcvals) -{ - double sum = 0; - for (int i = 0; i < npts; i++) - { - sum = sum + funcvals[i] * LagrangePoly(x, i, npts, xpts); - } - return sum; -} -double Parallel::LagrangePoly(double x, int pt, int npts, double *xpts) -{ - double h = 1; - int i; - - for (i = 0; i < pt; i++) - h = h * (x - xpts[i]) / (xpts[pt] - xpts[i]); - - for (i = pt + 1; i < npts; i++) - h = h * (x - xpts[i]) / (xpts[pt] - xpts[i]); - - return h; -} -// collect all grid segments or blocks including ghost and buffer for given patch -MyList *Parallel::build_complete_gsl(Patch *Pat) -{ - MyList *cgsl = 0, *gs; - MyList *BP = Pat->blb; - while (BP) - { - if (!cgsl) - { - cgsl = gs = new MyList; // delete through destroyList(); - gs->data = new Parallel::gridseg; - } - else - { - gs->next = new MyList; - gs = gs->next; - gs->data = new Parallel::gridseg; - } - - for (int i = 0; i < dim; i++) - { - gs->data->llb[i] = BP->data->bbox[i]; - gs->data->uub[i] = BP->data->bbox[dim + i]; - gs->data->shape[i] = BP->data->shape[i]; - } - gs->data->Bg = BP->data; - gs->next = 0; - - if (BP == Pat->ble) - break; - BP = BP->next; - } - - return cgsl; -} -// collect all grid segments or blocks including ghost and buffer for given patch list -MyList *Parallel::build_complete_gsl(MyList *PatL) -{ - MyList *cgsl = 0, *gs; - while (PatL) - { - if (!cgsl) - { - cgsl = build_complete_gsl(PatL->data); - gs = cgsl; - while (gs->next) - gs = gs->next; - } - else - { - gs->next = build_complete_gsl(PatL->data); - gs = gs->next; - while (gs->next) - gs = gs->next; - } - PatL = PatL->next; - } - - return cgsl; -} -// cellect the information of Patch list -MyList *Parallel::build_complete_gsl_virtual(MyList *PatL) -{ - MyList *cgsl = 0, *gs; - while (PatL) - { - if (cgsl) - { - gs->next = new MyList; - gs = gs->next; - gs->data = new Parallel::gridseg; - } - else - { - cgsl = gs = new MyList; - gs->data = new Parallel::gridseg; - } - - for (int i = 0; i < dim; i++) - { - gs->data->llb[i] = PatL->data->bbox[i]; - gs->data->uub[i] = PatL->data->bbox[dim + i]; - gs->data->shape[i] = PatL->data->shape[i]; - } - gs->data->Bg = 0; - gs->next = 0; - - PatL = PatL->next; - } - - return cgsl; -} -// cellect the information of Patch list without buffer points -MyList *Parallel::build_complete_gsl_virtual2(MyList *PatL) // - buffer -{ - MyList *cgsl = 0, *gs; - while (PatL) - { - if (cgsl) - { - gs->next = new MyList; - gs = gs->next; - gs->data = new Parallel::gridseg; - } - else - { - cgsl = gs = new MyList; - gs->data = new Parallel::gridseg; - } - - for (int i = 0; i < dim; i++) - { - double DH = PatL->data->getdX(i); - gs->data->llb[i] = PatL->data->bbox[i] + PatL->data->lli[i] * DH; - gs->data->uub[i] = PatL->data->bbox[dim + i] - PatL->data->uui[i] * DH; - gs->data->shape[i] = PatL->data->shape[i] - PatL->data->lli[i] - PatL->data->uui[i]; - } - gs->data->Bg = 0; - gs->next = 0; - - PatL = PatL->next; - } - - return cgsl; -} -// collect all grid segments or blocks without ghost for given patch, without extension -MyList *Parallel::build_bulk_gsl(Patch *Pat) -{ - MyList *cgsl = 0, *gs; - MyList *BP = Pat->blb; - while (BP) - { - Block *bp = BP->data; - if (!cgsl) - { - cgsl = gs = new MyList; - gs->data = new Parallel::gridseg; - } - else - { - gs->next = new MyList; - gs = gs->next; - gs->data = new Parallel::gridseg; - } - - for (int i = 0; i < dim; i++) - { - double DH = bp->getdX(i); - gs->data->uub[i] = (feq(bp->bbox[dim + i], Pat->bbox[dim + i], DH / 2)) ? bp->bbox[dim + i] : bp->bbox[dim + i] - ghost_width * DH; - gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] : bp->bbox[i] + ghost_width * DH; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1; -#else -#ifdef Cell - gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4); -#else -#error Not define Vertex nor Cell -#endif -#endif - } - gs->data->Bg = BP->data; - gs->next = 0; - - if (BP == Pat->ble) - break; - BP = BP->next; - } - - return cgsl; -} -// bulk part for given Block within given patch, without extension -MyList *Parallel::build_bulk_gsl(Block *bp, Patch *Pat) -{ - MyList *gs = 0; - - gs = new MyList; - gs->data = new Parallel::gridseg; - - for (int i = 0; i < dim; i++) - { - double DH = bp->getdX(i); - gs->data->uub[i] = (feq(bp->bbox[dim + i], Pat->bbox[dim + i], DH / 2)) ? bp->bbox[dim + i] : bp->bbox[dim + i] - ghost_width * DH; - gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] : bp->bbox[i] + ghost_width * DH; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1; -#else -#ifdef Cell - gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4); -#else -#error Not define Vertex nor Cell -#endif -#endif - } - gs->data->Bg = bp; - gs->next = 0; - - return gs; -} -MyList *Parallel::clone_gsl(MyList *p, bool first_only) -{ - MyList *np = 0, *q = 0, *pq = 0; - - while (p) - { - q = new MyList; - q->data = new Parallel::gridseg; - q->data->Bg = p->data->Bg; - for (int i = 0; i < dim; i++) - { - q->data->llb[i] = p->data->llb[i]; - q->data->uub[i] = p->data->uub[i]; - q->data->shape[i] = p->data->shape[i]; - } - if (pq) - pq->next = q; - else - np = q; - if (first_only) - { - np->next = 0; - return np; - } - pq = q; - p = p->next; - } - return np; -} -MyList *Parallel::gs_subtract(MyList *A, MyList *B) -{ - if (!A) - return 0; - if (!B) - return clone_gsl(A, true); - - double cut_plane[2 * dim], DH[dim]; - - for (int i = 0; i < dim; i++) - { - DH[i] = A->data->Bg->getdX(i); - if (B->data->Bg && !feq(DH[i], B->data->Bg->getdX(i), DH[i] / 2)) - { - cout << "Parallel::gs_subtract meets different grid segment " << DH[i] << " vs " << B->data->Bg->getdX(i) << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - - MyList *C = 0, *q; - for (int i = 0; i < dim; i++) - { - if (B->data->llb[i] > A->data->uub[i] || B->data->uub[i] < A->data->llb[i]) - return clone_gsl(A, true); - cut_plane[i] = A->data->llb[i]; - cut_plane[i + dim] = A->data->uub[i]; - } - - for (int i = 0; i < dim; i++) - { - cut_plane[i] = Mymax(A->data->llb[i], B->data->llb[i]); - if (cut_plane[i] - A->data->llb[i] > DH[i] / 2) - { - q = clone_gsl(A, true); - // prolong the list from head - if (C) - q->next = C; - C = q; - for (int j = 0; j < dim; j++) - { - if (i == j) - { - C->data->llb[i] = A->data->llb[i]; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - C->data->uub[i] = Mymax(C->data->llb[i], cut_plane[i] - DH[i]); -#else -#ifdef Cell - C->data->uub[i] = Mymax(C->data->llb[i], cut_plane[i]); -#else -#error Not define Vertex nor Cell -#endif -#endif - } - else - { - C->data->llb[j] = cut_plane[j]; - C->data->uub[j] = cut_plane[j + dim]; - } -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - C->data->shape[j] = int((C->data->uub[j] - C->data->llb[j]) / DH[j] + 0.4) + 1; -#else -#ifdef Cell - C->data->shape[j] = int((C->data->uub[j] - C->data->llb[j]) / DH[j] + 0.4); -#else -#error Not define Vertex nor Cell -#endif -#endif - } - } - - cut_plane[i + dim] = Mymin(A->data->uub[i], B->data->uub[i]); - if (A->data->uub[i] - cut_plane[i + dim] > DH[i] / 2) - { - q = clone_gsl(A, true); - if (C) - q->next = C; - C = q; - for (int j = 0; j < dim; j++) - { - if (i == j) - { - C->data->uub[i] = A->data->uub[i]; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - C->data->llb[i] = Mymin(C->data->uub[i], cut_plane[i + dim] + DH[i]); -#else -#ifdef Cell - C->data->llb[i] = Mymin(C->data->uub[i], cut_plane[i + dim]); -#else -#error Not define Vertex nor Cell -#endif -#endif - } - else - { - C->data->llb[j] = cut_plane[j]; - C->data->uub[j] = cut_plane[j + dim]; - } -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - C->data->shape[j] = int((C->data->uub[j] - C->data->llb[j]) / DH[j] + 0.4) + 1; -#else -#ifdef Cell - C->data->shape[j] = int((C->data->uub[j] - C->data->llb[j]) / DH[j] + 0.4); -#else -#error Not define Vertex nor Cell -#endif -#endif - } - } - } - return C; -} -// stupid method -/* -MyList *Parallel::gsl_subtract(MyList *A,MyList *B) //A subtract B but with A's information -{ -// always make return and A, B distinct - if(!A) return 0; - - if(!B) return clone_gsl(A,0); - - MyList *C=0,*C0,*C1,*Cc,*CC0,*gs; - - while(A) - { - C0=gs_subtract(A,B); // note C0 becomes a list after subtraction - C1=B->next; - while(C1) - { - CC0=C0; - Cc=0; - while(CC0) - { - gs=gs_subtract(CC0,C1); - if(Cc) Cc->catList(gs); - else Cc=gs; - CC0=CC0->next; - } - if(C0) C0->destroyList(); - C0=Cc; - C1=C1->next; - } - if(C) C->catList(C0); - else C=C0; - A=A->next; - } - - return C; -} -*/ -// more clever method -MyList *Parallel::gsl_subtract(MyList *A, MyList *B) // A subtract B but with A's information -{ - // always make return and A, B distinct - if (!A) - return 0; - - MyList *C = 0, *C0, *C1; - - C = clone_gsl(A, 0); - - while (B) - { - C0 = 0; - C1 = C; - while (C1) - { - if (C0) - C0->catList(gs_subtract(C1, B)); - else - C0 = gs_subtract(C1, B); - C1 = C1->next; - } - if (C) - C->destroyList(); - else - { - if (C0) - C0->destroyList(); - return 0; - } - - C = C0; - B = B->next; - } - - return C; -} -MyList *Parallel::gs_and(MyList *A, MyList *B) -{ - if (!A || !B) - return 0; - - double llb[dim], uub[dim]; - bool flag = false; - for (int i = 0; i < dim; i++) - { - llb[i] = Mymax(A->data->llb[i], B->data->llb[i]); - uub[i] = Mymin(A->data->uub[i], B->data->uub[i]); - if (llb[i] > uub[i]) - { - flag = true; - break; - } - } - if (flag) - return 0; - - MyList *C; - C = clone_gsl(A, true); - for (int i = 0; i < dim; i++) - { - C->data->llb[i] = llb[i]; - C->data->uub[i] = uub[i]; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - C->data->shape[i] = int((C->data->uub[i] - C->data->llb[i]) / C->data->Bg->getdX(i) + 0.4) + 1; -#else -#ifdef Cell - C->data->shape[i] = int((C->data->uub[i] - C->data->llb[i]) / C->data->Bg->getdX(i) + 0.4); -#else -#error Not define Vertex nor Cell -#endif -#endif - } - - return C; -} -// overlap of A_i and (union of all j of B_j) -MyList *Parallel::gsl_and(MyList *A, MyList *B) // A and B but with A's information -{ - MyList *C = 0, *C1; - - while (A) - { - C1 = B; - while (C1) - { - if (C) - C->catList(gs_and(A, C1)); - else - C = gs_and(A, C1); - C1 = C1->next; - } - A = A->next; - } - return C; -} -// collect all ghost grid segments or blocks for given patch -MyList *Parallel::build_ghost_gsl(Patch *Pat) -{ - MyList *cgsl = 0, *gs, *gsb; - MyList *BP = Pat->blb; - while (BP) - { - gs = new MyList; - gs->data = new Parallel::gridseg; - - for (int i = 0; i < dim; i++) - { - gs->data->llb[i] = BP->data->bbox[i]; - gs->data->uub[i] = BP->data->bbox[dim + i]; - gs->data->shape[i] = BP->data->shape[i]; - } - gs->data->Bg = BP->data; - gs->next = 0; - - gsb = build_bulk_gsl(BP->data, Pat); - - if (!cgsl) - cgsl = gs_subtract(gs, gsb); - else - cgsl->catList(gs_subtract(gs, gsb)); - - gsb->destroyList(); - gs->destroyList(); - - if (BP == Pat->ble) - break; - BP = BP->next; - } - - return cgsl; -} -// collect all ghost grid segments or blocks for given patch list -MyList *Parallel::build_ghost_gsl(MyList *PatL) -{ - MyList *cgsl = 0, *gs; - while (PatL) - { - if (!cgsl) - { - cgsl = build_ghost_gsl(PatL->data); - gs = cgsl; - while (gs->next) - gs = gs->next; - } - else - { - gs->next = build_ghost_gsl(PatL->data); - gs = gs->next; - while (gs->next) - gs = gs->next; - } - PatL = PatL->next; - } - - return cgsl; -} -// collect all grid segments or blocks without ghost for given patch -// special for Sync usage, so we do not need consider missing points -MyList *Parallel::build_owned_gsl0(Patch *Pat, int rank_in) -{ - MyList *cgsl = 0, *gs; - MyList *BP = Pat->blb; - while (BP) - { - Block *bp = BP->data; - if (bp->rank == rank_in) - { - if (!cgsl) - { - cgsl = gs = new MyList; - gs->data = new Parallel::gridseg; - } - else - { - gs->next = new MyList; - gs = gs->next; - gs->data = new Parallel::gridseg; - } - - for (int i = 0; i < dim; i++) - { - double DH = bp->getdX(i); - gs->data->uub[i] = (feq(bp->bbox[dim + i], Pat->bbox[dim + i], DH / 2)) ? bp->bbox[dim + i] : bp->bbox[dim + i] - ghost_width * DH; - gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] : bp->bbox[i] + ghost_width * DH; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1; -#else -#ifdef Cell - gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4); -#else -#error Not define Vertex nor Cell -#endif -#endif - } - gs->data->Bg = BP->data; - gs->next = 0; - } - - if (BP == Pat->ble) - break; - BP = BP->next; - } - - return cgsl; -} -// collect all grid segments or blocks without ghost for given patch -MyList *Parallel::build_owned_gsl1(Patch *Pat, int rank_in) -{ - MyList *cgsl = 0, *gs; - MyList *BP = Pat->blb; - while (BP) - { - Block *bp = BP->data; - if (bp->rank == rank_in) - { - if (!cgsl) - { - cgsl = gs = new MyList; - gs->data = new Parallel::gridseg; - } - else - { - gs->next = new MyList; - gs = gs->next; - gs->data = new Parallel::gridseg; - } - - for (int i = 0; i < dim; i++) - { - double DH = bp->getdX(i); - gs->data->uub[i] = (feq(bp->bbox[dim + i], Pat->bbox[dim + i], DH / 2)) ? bp->bbox[dim + i] : bp->bbox[dim + i] - ghost_width * DH; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - // NOTE: our dividing structure is (exclude ghost) - // -1 0 - // 1 2 - // so (0,1) does not belong to any part for vertex structure, we always put it to right part, this is consistent to - // the fortran routine where we always take floor to get index - gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] : bp->bbox[i] + (ghost_width - 1) * DH; - gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1; -#else -#ifdef Cell - gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] : bp->bbox[i] + ghost_width * DH; - gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4); -#else -#error Not define Vertex nor Cell -#endif -#endif - } - gs->data->Bg = BP->data; - gs->next = 0; - } - - if (BP == Pat->ble) - break; - BP = BP->next; - } - - return cgsl; -} -// collect all grid segments or blocks without ghost nor buffer for given patch -MyList *Parallel::build_owned_gsl2(Patch *Pat, int rank_in) -{ - MyList *cgsl = 0, *gs; - MyList *BP = Pat->blb; - while (BP) - { - Block *bp = BP->data; - if (bp->rank == rank_in) - { - if (!cgsl) - { - cgsl = gs = new MyList; - gs->data = new Parallel::gridseg; - } - else - { - gs->next = new MyList; - gs = gs->next; - gs->data = new Parallel::gridseg; - } - - for (int i = 0; i < dim; i++) - { - double DH = bp->getdX(i); - gs->data->uub[i] = (feq(bp->bbox[dim + i], Pat->bbox[dim + i], DH / 2)) ? bp->bbox[dim + i] - Pat->uui[i] * DH : bp->bbox[dim + i] - ghost_width * DH; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - // NOTE: our dividing structure is (exclude ghost) - // -1 0 - // 1 2 - // so (0,1) does not belong to any part for vertex structure, we always put it to right part, this is consistent to - // the fortran routine where we always take floor to get index - gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] + Pat->lli[i] * DH : bp->bbox[i] + (ghost_width - 1) * DH; - gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1; -#else -#ifdef Cell - gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] + Pat->lli[i] * DH : bp->bbox[i] + ghost_width * DH; - gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4); -#else -#error Not define Vertex nor Cell -#endif -#endif - } - gs->data->Bg = BP->data; - gs->next = 0; - } - - if (BP == Pat->ble) - break; - BP = BP->next; - } - - return cgsl; -} -// collect all grid segments or blocks without ghost for given patch, and delete the ghost_width for interpolation consideration on the patch boundary -MyList *Parallel::build_owned_gsl3(Patch *Pat, int rank_in, int Symmetry) -{ - MyList *cgsl = 0, *gs; - MyList *BP = Pat->blb; - while (BP) - { - Block *bp = BP->data; - if (bp->rank == rank_in) - { - if (!cgsl) - { - cgsl = gs = new MyList; - gs->data = new Parallel::gridseg; - } - else - { - gs->next = new MyList; - gs = gs->next; - gs->data = new Parallel::gridseg; - } - - for (int i = 0; i < dim; i++) - { - double DH = bp->getdX(i); - gs->data->uub[i] = bp->bbox[dim + i] - ghost_width * DH; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - // NOTE: our dividing structure is (exclude ghost) - // -1 0 - // 1 2 - // so (0,1) does not belong to any part for vertex structure, we always put it to right part, this is consistent to - // the fortran routine where we always take floor to get index - gs->data->llb[i] = bp->bbox[i] + (ghost_width - 1) * DH; - gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1; -#else -#ifdef Cell - gs->data->llb[i] = bp->bbox[i] + ghost_width * DH; - gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4); -#else -#error Not define Vertex nor Cell -#endif -#endif - } - // Symmetry consideration - if (Symmetry > 0) - { - double DH = bp->getdX(2); - if (feq(bp->bbox[2], 0, DH / 2)) - { - gs->data->llb[2] = bp->bbox[2]; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - gs->data->shape[2] = int((gs->data->uub[2] - gs->data->llb[2]) / DH + 0.4) + 1; -#else -#ifdef Cell - gs->data->shape[2] = int((gs->data->uub[2] - gs->data->llb[2]) / DH + 0.4); -#else -#error Not define Vertex nor Cell -#endif -#endif - } - if (Symmetry > 1) - { - for (int i = 0; i < 2; i++) - { - DH = bp->getdX(i); - if (feq(bp->bbox[i], 0, DH / 2)) - { - gs->data->llb[i] = bp->bbox[i]; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1; -#else -#ifdef Cell - gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4); -#else -#error Not define Vertex nor Cell -#endif -#endif - } - } - } - } - - gs->data->Bg = BP->data; - gs->next = 0; - } - - if (BP == Pat->ble) - break; - BP = BP->next; - } - - return cgsl; -} -// collect all grid segments or blocks without ghost nor buffer for given patch, -// and delete the ghost_width for interpolation consideration on the patch boundary -MyList *Parallel::build_owned_gsl4(Patch *Pat, int rank_in, int Symmetry) -{ - MyList *cgsl = 0, *gs; - MyList *BP = Pat->blb; - while (BP) - { - Block *bp = BP->data; - if (bp->rank == rank_in) - { - if (!cgsl) - { - cgsl = gs = new MyList; - gs->data = new Parallel::gridseg; - } - else - { - gs->next = new MyList; - gs = gs->next; - gs->data = new Parallel::gridseg; - } - - for (int i = 0; i < dim; i++) - { - double DH = bp->getdX(i); - gs->data->uub[i] = (feq(bp->bbox[dim + i], Pat->bbox[dim + i], DH / 2)) ? bp->bbox[dim + i] - Pat->uui[i] * DH : bp->bbox[dim + i]; - gs->data->uub[i] -= ghost_width * DH; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - // NOTE: our dividing structure is (exclude ghost) - // -1 0 - // 1 2 - // so (0,1) does not belong to any part for vertex structure, we always put it to right part, this is consistent to - // the fortran routine where we always take floor to get index - gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] + Pat->lli[i] * DH : bp->bbox[i]; - gs->data->llb[i] += (ghost_width - 1) * DH; - gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1; -#else -#ifdef Cell - gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] + Pat->lli[i] * DH : bp->bbox[i]; - gs->data->llb[i] += ghost_width * DH; - gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4); -#else -#error Not define Vertex nor Cell -#endif -#endif - } - // Symmetry consideration - if (Symmetry > 0) - { - double DH = bp->getdX(2); - if (feq(bp->bbox[2], 0, DH / 2)) - { - gs->data->llb[2] = bp->bbox[2]; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - gs->data->shape[2] = int((gs->data->uub[2] - gs->data->llb[2]) / DH + 0.4) + 1; -#else -#ifdef Cell - gs->data->shape[2] = int((gs->data->uub[2] - gs->data->llb[2]) / DH + 0.4); -#else -#error Not define Vertex nor Cell -#endif -#endif - } - if (Symmetry > 1) - { - for (int i = 0; i < 2; i++) - { - DH = bp->getdX(i); - if (feq(bp->bbox[i], 0, DH / 2)) - { - gs->data->llb[i] = bp->bbox[i]; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1; -#else -#ifdef Cell - gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4); -#else -#error Not define Vertex nor Cell -#endif -#endif - } - } - } - } - - gs->data->Bg = BP->data; - gs->next = 0; - } - - if (BP == Pat->ble) - break; - BP = BP->next; - } - - return cgsl; -} -// collect all grid segments or blocks without ghost nor buffer for given patch, no extention -MyList *Parallel::build_owned_gsl5(Patch *Pat, int rank_in) -{ - MyList *cgsl = 0, *gs; - MyList *BP = Pat->blb; - while (BP) - { - Block *bp = BP->data; - if (bp->rank == rank_in) - { - if (!cgsl) - { - cgsl = gs = new MyList; - gs->data = new Parallel::gridseg; - } - else - { - gs->next = new MyList; - gs = gs->next; - gs->data = new Parallel::gridseg; - } - - for (int i = 0; i < dim; i++) - { - double DH = bp->getdX(i); - gs->data->uub[i] = (feq(bp->bbox[dim + i], Pat->bbox[dim + i], DH / 2)) ? bp->bbox[dim + i] - Pat->uui[i] * DH : bp->bbox[dim + i] - ghost_width * DH; - gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] + Pat->lli[i] * DH : bp->bbox[i] + ghost_width * DH; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1; -#else -#ifdef Cell - gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4); -#else -#error Not define Vertex nor Cell -#endif -#endif - } - gs->data->Bg = BP->data; - gs->next = 0; - } - - if (BP == Pat->ble) - break; - BP = BP->next; - } - - return cgsl; -} -// collect all grid segments or blocks without ghost for given patch list -// stupid method -/* -MyList *Parallel::build_owned_gsl(MyList *PatL,int rank_in,int type,int Symmetry) -{ - MyList *cgsl=0,*gs; - while(PatL) - { - if(!cgsl) - { - switch(type) - { - case 0: - cgsl = build_owned_gsl0(PatL->data,rank_in); - break; - case 1: - cgsl = build_owned_gsl1(PatL->data,rank_in); - break; - case 2: - cgsl = build_owned_gsl2(PatL->data,rank_in); - break; - case 3: - cgsl = build_owned_gsl3(PatL->data,rank_in,Symmetry); - break; - case 4: - cgsl = build_owned_gsl4(PatL->data,rank_in,Symmetry); - break; - case 5: - cgsl = build_owned_gsl5(PatL->data,rank_in); - break; - default: - cout<<"Parallel::build_owned_gsl : unknown type = "<next) gs = gs->next; - } - else - { - switch(type) - { - case 0: - gs->next = build_owned_gsl0(PatL->data,rank_in); - break; - case 1: - gs->next = build_owned_gsl1(PatL->data,rank_in); - break; - case 2: - gs->next = build_owned_gsl2(PatL->data,rank_in); - break; - case 3: - gs->next = build_owned_gsl3(PatL->data,rank_in,Symmetry); - break; - case 4: - gs->next = build_owned_gsl4(PatL->data,rank_in,Symmetry); - break; - case 5: - gs->next = build_owned_gsl5(PatL->data,rank_in); - break; - default: - cout<<"Parallel::build_owned_gsl : unknown type = "<next) gs = gs->next; - } - PatL = PatL->next; - } - - return cgsl; -} -*/ -// more clever method -MyList *Parallel::build_owned_gsl(MyList *PatL, int rank_in, int type, int Symmetry) -{ - MyList *cgsl = 0, *gs; - while (PatL) - { - switch (type) - { - case 0: - gs = build_owned_gsl0(PatL->data, rank_in); - break; - case 1: - gs = build_owned_gsl1(PatL->data, rank_in); - break; - case 2: - gs = build_owned_gsl2(PatL->data, rank_in); - break; - case 3: - gs = build_owned_gsl3(PatL->data, rank_in, Symmetry); - break; - case 4: - gs = build_owned_gsl4(PatL->data, rank_in, Symmetry); - break; - case 5: - gs = build_owned_gsl5(PatL->data, rank_in); - break; - default: - cout << "Parallel::build_owned_gsl : unknown type = " << type << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - if (cgsl) - cgsl->catList(gs); - else - cgsl = gs; - PatL = PatL->next; - } - - return cgsl; -} -// according to overlape to determine real grid segments -void Parallel::build_gstl(MyList *srci, MyList *dsti, - MyList **out_src, MyList **out_dst) -{ - *out_src = *out_dst = 0; - - if (!srci || !dsti) - return; - - MyList *s, *d; - MyList *s2, *d2; - - double llb[dim], uub[dim]; - - s = srci; - while (s) - { - Parallel::gridseg *sd = s->data; - d = dsti; - while (d) - { - Parallel::gridseg *dd = d->data; - bool flag = true; - for (int i = 0; i < dim; i++) - { - double SH = sd->Bg->getdX(i), DH = dd->Bg->getdX(i); - llb[i] = Mymax(sd->llb[i], dd->llb[i]); - uub[i] = Mymin(sd->uub[i], dd->uub[i]); - // make sure the region boundary is consistent to the grids - // here we only judge if the domain is empty, so do not need to adjust the align - double lb = llb[i], ub = uub[i]; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - // ---*--- - // x-------x - // if (int(2*(sd->uub[i]-uub[i])/SH+0.4)%2 == 1) ub = uub[i]-SH/2; - // else if(int(2*(dd->uub[i]-uub[i])/DH+0.4)%2 == 1) ub = uub[i]-DH/2; - // if (int(2*(llb[i]-sd->llb[i])/SH+0.4)%2 == 1) lb = llb[i]+SH/2; - // else if(int(2*(llb[i]-dd->llb[i])/DH+0.4)%2 == 1) lb = llb[i]+DH/2; - if (lb > ub + Mymin(SH, DH) / 2) - { - flag = false; - break; - } // special for isolated point -#else -#ifdef Cell - // |------| - // |-------------| - // if (int(2*(sd->uub[i]-uub[i])/SH+0.4)%2 == 1) ub = uub[i]+SH/2; - // else if(int(2*(dd->uub[i]-uub[i])/DH+0.4)%2 == 1) ub = uub[i]+DH/2; - // |------| - // |-------------| - // if (int(2*(llb[i]-sd->llb[i])/SH+0.4)%2 == 1) lb = llb[i]-SH/2; - // else if(int(2*(llb[i]-dd->llb[i])/DH+0.4)%2 == 1) lb = llb[i]-DH/2; - if (ub - lb < Mymin(SH, DH) / 2) - { - flag = false; - break; - } // even for isolated point, it has a cell belong to it -#else -#error Not define Vertex nor Cell -#endif -#endif - } - - if (flag) - { - if (!(*out_src)) - { - *out_src = s2 = new MyList; - *out_dst = d2 = new MyList; - s2->data = new Parallel::gridseg; - d2->data = new Parallel::gridseg; - } - else - { - s2->next = new MyList; - s2 = s2->next; - d2->next = new MyList; - d2 = d2->next; - s2->data = new Parallel::gridseg; - d2->data = new Parallel::gridseg; - } - - for (int i = 0; i < dim; i++) - { - double SH = sd->Bg->getdX(i), DH = dd->Bg->getdX(i); - s2->data->llb[i] = d2->data->llb[i] = llb[i]; - s2->data->uub[i] = d2->data->uub[i] = uub[i]; -// using float method to count point, we do not need following consideration (2012 nov 17) -#if 1 - -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - // old code distuinguish vertex and cell - // if (int(2*(sd->uub[i]-uub[i])/SH+0.4)%2 == 1) s2->data->uub[i] = uub[i]-SH/2; - // else if(int(2*(dd->uub[i]-uub[i])/DH+0.4)%2 == 1) d2->data->uub[i] = uub[i]-DH/2; - // if (int(2*(llb[i]-sd->llb[i])/SH+0.4)%2 == 1) s2->data->llb[i] = llb[i]+SH/2; - // else if(int(2*(llb[i]-dd->llb[i])/DH+0.4)%2 == 1) d2->data->llb[i] = llb[i]+DH/2; - // new code: here we concern much more about missing point, because overlaping domain has been gaureented above - if (int(2 * (sd->uub[i] - uub[i]) / SH + 0.4) % 2 == 1) - s2->data->uub[i] = uub[i] + SH / 2; - else if (int(2 * (dd->uub[i] - uub[i]) / DH + 0.4) % 2 == 1) - d2->data->uub[i] = uub[i] + DH / 2; - if (int(2 * (llb[i] - sd->llb[i]) / SH + 0.4) % 2 == 1) - s2->data->llb[i] = llb[i] - SH / 2; - else if (int(2 * (llb[i] - dd->llb[i]) / DH + 0.4) % 2 == 1) - d2->data->llb[i] = llb[i] - DH / 2; - s2->data->shape[i] = int((s2->data->uub[i] - s2->data->llb[i]) / SH + 0.4) + 1; - d2->data->shape[i] = int((d2->data->uub[i] - d2->data->llb[i]) / DH + 0.4) + 1; -#else -#ifdef Cell - if (int(2 * (sd->uub[i] - uub[i]) / SH + 0.4) % 2 == 1) - s2->data->uub[i] = uub[i] + SH / 2; - else if (int(2 * (dd->uub[i] - uub[i]) / DH + 0.4) % 2 == 1) - d2->data->uub[i] = uub[i] + DH / 2; - if (int(2 * (llb[i] - sd->llb[i]) / SH + 0.4) % 2 == 1) - s2->data->llb[i] = llb[i] - SH / 2; - else if (int(2 * (llb[i] - dd->llb[i]) / DH + 0.4) % 2 == 1) - d2->data->llb[i] = llb[i] - DH / 2; - s2->data->shape[i] = int((s2->data->uub[i] - s2->data->llb[i]) / SH + 0.4); - d2->data->shape[i] = int((d2->data->uub[i] - d2->data->llb[i]) / DH + 0.4); -#else -#error Not define Vertex nor Cell -#endif -#endif - -#endif - s2->data->illb[i] = sd->illb[i]; - d2->data->illb[i] = dd->illb[i]; - s2->data->iuub[i] = sd->iuub[i]; - d2->data->iuub[i] = dd->iuub[i]; - } - s2->data->Bg = sd->Bg; - s2->next = 0; - d2->data->Bg = dd->Bg; - d2->next = 0; - } - d = d->next; - } - s = s->next; - } -} -// PACK: prepare target data in 'data' -// UNPACK: copy target data from 'data' to corresponding numerical grids -int Parallel::data_packer(double *data, MyList *src, MyList *dst, int rank_in, int dir, - MyList *VarLists /* source */, MyList *VarListd /* target */, int Symmetry) -{ - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - - int DIM = dim; - - if (dir != PACK && dir != UNPACK) - { - cout << "error dir " << dir << " for data_packer " << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - int size_out = 0; - - if (!src || !dst) - return size_out; - - MyList *varls, *varld; - - varls = VarLists; - varld = VarListd; - while (varls && varld) - { - varls = varls->next; - varld = varld->next; - } - - if (varls || varld) - { - cout << "error in short data packer, var lists does not match." << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - int type; /* 1 copy, 2 restrict, 3 prolong */ - if (src->data->Bg->lev == dst->data->Bg->lev) - type = 1; - else if (src->data->Bg->lev > dst->data->Bg->lev) - type = 2; - else - type = 3; - - while (src && dst) - { - if ((dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) || - (dir == UNPACK && src->data->Bg->rank == rank_in && dst->data->Bg->rank == myrank)) - { - varls = VarLists; - varld = VarListd; - while (varls && varld) - { - if (data) - { - if (dir == PACK) - switch (type) - { - // attention must be paied to the difference between src's llb,uub and dst's llb,uub - case 1: - f_copy(DIM, dst->data->llb, dst->data->uub, dst->data->shape, data + size_out, - src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn], - dst->data->llb, dst->data->uub); - break; - case 2: - f_restrict3(DIM, dst->data->llb, dst->data->uub, dst->data->shape, data + size_out, - src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn], - dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry); - break; - case 3: - f_prolong3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn], - dst->data->llb, dst->data->uub, dst->data->shape, data + size_out, - dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry); - } - if (dir == UNPACK) // from target data to corresponding grid - f_copy(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape, dst->data->Bg->fgfs[varld->data->sgfn], - dst->data->llb, dst->data->uub, dst->data->shape, data + size_out, - dst->data->llb, dst->data->uub); - } - size_out += dst->data->shape[0] * dst->data->shape[1] * dst->data->shape[2]; - varls = varls->next; - varld = varld->next; - } - } - dst = dst->next; - src = src->next; - } - - return size_out; -} -int Parallel::data_packermix(double *data, MyList *src, MyList *dst, int rank_in, int dir, - MyList *VarLists /* source */, MyList *VarListd /* target */, int Symmetry) -{ - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - - int DIM = dim; - - if (dir != PACK && dir != UNPACK) - { - cout << "Parallel::data_packermix: error dir " << dir << " for data_packermix." << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - int size_out = 0; - - if (!src || !dst) - return size_out; - - MyList *varls, *varld; - - varls = VarLists; - varld = VarListd; - while (varls && varld) - { - varls = varls->next; - varld = varld->next; - } - - if (varls || varld) - { - cout << "error in short data packer, var lists does not match." << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - int type; /* 1 copy, 2 restrict, 3 prolong */ - if (src->data->Bg->lev == dst->data->Bg->lev) - type = 1; - else if (src->data->Bg->lev > dst->data->Bg->lev) - type = 2; - else - type = 3; - - if (type != 3) - { - cout << "Parallel::data_packermix: error type " << type << " for data_packermix." << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - while (src && dst) - { - if ((dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) || - (dir == UNPACK && src->data->Bg->rank == rank_in && dst->data->Bg->rank == myrank)) - { - varls = VarLists; - varld = VarListd; - while (varls && varld) - { - if (data) - { - if (dir == PACK) - f_prolongcopy3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn], - dst->data->llb, dst->data->uub, src->data->shape, data + size_out, - src->data->llb, src->data->uub, varls->data->SoA, Symmetry); - if (dir == UNPACK) // from target data to corresponding grid - f_prolongmix3(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape, dst->data->Bg->fgfs[varld->data->sgfn], - src->data->llb, src->data->uub, src->data->shape, data + size_out, - dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry, dst->data->illb, dst->data->iuub); - } - // the symmetry problem should be dealt in prolongcopy3, - // so we always have ghost_width for both sides - size_out += (src->data->shape[0] + 2 * ghost_width) * (src->data->shape[1] + 2 * ghost_width) * (src->data->shape[2] + 2 * ghost_width); - varls = varls->next; - varld = varld->next; - } - } - dst = dst->next; - src = src->next; - } - - return size_out; -} -// -void Parallel::transfer(MyList **src, MyList **dst, - MyList *VarList1 /* source */, MyList *VarList2 /*target */, - int Symmetry) -{ - int myrank, cpusize; - MPI_Comm_size(MPI_COMM_WORLD, &cpusize); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - - int node; - - MPI_Request *reqs; - MPI_Status *stats; - reqs = new MPI_Request[2 * cpusize]; - stats = new MPI_Status[2 * cpusize]; - int req_no = 0; - - double **send_data, **rec_data; - send_data = new double *[cpusize]; - rec_data = new double *[cpusize]; - int length; - - for (node = 0; node < cpusize; node++) - { - send_data[node] = rec_data[node] = 0; - if (node == myrank) - { - if (length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry)) - { - rec_data[node] = new double[length]; - if (!rec_data[node]) - { - cout << "out of memory when new in short transfer, place 1" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - data_packer(rec_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry); - } - } - else - { - // send from this cpu to cpu#node - if (length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry)) - { - send_data[node] = new double[length]; - if (!send_data[node]) - { - cout << "out of memory when new in short transfer, place 2" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - data_packer(send_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry); - MPI_Isend((void *)send_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++); - } - // receive from cpu#node to this cpu - if (length = data_packer(0, src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry)) - { - rec_data[node] = new double[length]; - if (!rec_data[node]) - { - cout << "out of memory when new in short transfer, place 3" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - MPI_Irecv((void *)rec_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++); - } - } - } - // wait for all requests to complete - MPI_Waitall(req_no, reqs, stats); - - for (node = 0; node < cpusize; node++) - if (rec_data[node]) - data_packer(rec_data[node], src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry); - - for (node = 0; node < cpusize; node++) - { - if (send_data[node]) - delete[] send_data[node]; - if (rec_data[node]) - delete[] rec_data[node]; - } - - delete[] reqs; - delete[] stats; - delete[] send_data; - delete[] rec_data; -} -// -void Parallel::transfermix(MyList **src, MyList **dst, - MyList *VarList1 /* source */, MyList *VarList2 /*target */, - int Symmetry) -{ - int myrank, cpusize; - MPI_Comm_size(MPI_COMM_WORLD, &cpusize); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - - int node; - - MPI_Request *reqs; - MPI_Status *stats; - reqs = new MPI_Request[2 * cpusize]; - stats = new MPI_Status[2 * cpusize]; - int req_no = 0; - - double **send_data, **rec_data; - send_data = new double *[cpusize]; - rec_data = new double *[cpusize]; - int length; - - for (node = 0; node < cpusize; node++) - { - send_data[node] = rec_data[node] = 0; - if (node == myrank) - { - if (length = data_packermix(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry)) - { - rec_data[node] = new double[length]; - if (!rec_data[node]) - { - cout << "out of memory when new in short transfer, place 1" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - data_packermix(rec_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry); - } - } - else - { - // send from this cpu to cpu#node - if (length = data_packermix(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry)) - { - send_data[node] = new double[length]; - if (!send_data[node]) - { - cout << "out of memory when new in short transfer, place 2" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - data_packermix(send_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry); - MPI_Isend((void *)send_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++); - } - // receive from cpu#node to this cpu - if (length = data_packermix(0, src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry)) - { - rec_data[node] = new double[length]; - if (!rec_data[node]) - { - cout << "out of memory when new in short transfer, place 3" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - MPI_Irecv((void *)rec_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++); - } - } - } - // wait for all requests to complete - MPI_Waitall(req_no, reqs, stats); - - for (node = 0; node < cpusize; node++) - if (rec_data[node]) - data_packermix(rec_data[node], src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry); - - for (node = 0; node < cpusize; node++) - { - if (send_data[node]) - delete[] send_data[node]; - if (rec_data[node]) - delete[] rec_data[node]; - } - - delete[] reqs; - delete[] stats; - delete[] send_data; - delete[] rec_data; -} -void Parallel::Sync(Patch *Pat, MyList *VarList, int Symmetry) -{ - int cpusize; - MPI_Comm_size(MPI_COMM_WORLD, &cpusize); - - MyList *dst; - MyList **src, **transfer_src, **transfer_dst; - src = new MyList *[cpusize]; - transfer_src = new MyList *[cpusize]; - transfer_dst = new MyList *[cpusize]; - - dst = build_ghost_gsl(Pat); // ghost region only - for (int node = 0; node < cpusize; node++) - { - src[node] = build_owned_gsl0(Pat, node); // for the part without ghost points and do not extend - build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer_src[node], data locate on cpu#node; - // but for transfer_dst[node] the data may locate on any node - } - - transfer(transfer_src, transfer_dst, VarList, VarList, Symmetry); - - if (dst) - dst->destroyList(); - for (int node = 0; node < cpusize; node++) - { - if (src[node]) - src[node]->destroyList(); - if (transfer_src[node]) - transfer_src[node]->destroyList(); - if (transfer_dst[node]) - transfer_dst[node]->destroyList(); - } - - delete[] src; - delete[] transfer_src; - delete[] transfer_dst; -} -void Parallel::Sync(MyList *PatL, MyList *VarList, int Symmetry) -{ - // Patch inner Synch - MyList *Pp = PatL; - while (Pp) - { - Sync(Pp->data, VarList, Symmetry); - Pp = Pp->next; - } - - // Patch inter Synch - int cpusize; - MPI_Comm_size(MPI_COMM_WORLD, &cpusize); - - MyList *dst; - MyList **src, **transfer_src, **transfer_dst; - src = new MyList *[cpusize]; - transfer_src = new MyList *[cpusize]; - transfer_dst = new MyList *[cpusize]; - - dst = build_buffer_gsl(PatL); // buffer region only - for (int node = 0; node < cpusize; node++) - { - src[node] = build_owned_gsl(PatL, node, 5, Symmetry); // for the part without ghost nor buffer points and do not extend - build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node - } - - transfer(transfer_src, transfer_dst, VarList, VarList, Symmetry); - - if (dst) - dst->destroyList(); - for (int node = 0; node < cpusize; node++) - { - if (src[node]) - src[node]->destroyList(); - if (transfer_src[node]) - transfer_src[node]->destroyList(); - if (transfer_dst[node]) - transfer_dst[node]->destroyList(); - } - - delete[] src; - delete[] transfer_src; - delete[] transfer_dst; -} -// Merged Sync: collect all intra-patch and inter-patch grid segment lists, -// then issue a single transfer() call instead of N+1 separate ones. -void Parallel::Sync_merged(MyList *PatL, MyList *VarList, int Symmetry) -{ - int cpusize; - MPI_Comm_size(MPI_COMM_WORLD, &cpusize); - - MyList **combined_src = new MyList *[cpusize]; - MyList **combined_dst = new MyList *[cpusize]; - for (int node = 0; node < cpusize; node++) - combined_src[node] = combined_dst[node] = 0; - - // Phase A: Intra-patch ghost exchange segments - MyList *Pp = PatL; - while (Pp) - { - Patch *Pat = Pp->data; - MyList *dst_ghost = build_ghost_gsl(Pat); - - for (int node = 0; node < cpusize; node++) - { - MyList *src_owned = build_owned_gsl0(Pat, node); - MyList *tsrc = 0, *tdst = 0; - build_gstl(src_owned, dst_ghost, &tsrc, &tdst); - - if (tsrc) - { - if (combined_src[node]) - combined_src[node]->catList(tsrc); - else - combined_src[node] = tsrc; - } - if (tdst) - { - if (combined_dst[node]) - combined_dst[node]->catList(tdst); - else - combined_dst[node] = tdst; - } - - if (src_owned) - src_owned->destroyList(); - } - - if (dst_ghost) - dst_ghost->destroyList(); - - Pp = Pp->next; - } - - // Phase B: Inter-patch buffer exchange segments - MyList *dst_buffer = build_buffer_gsl(PatL); - for (int node = 0; node < cpusize; node++) - { - MyList *src_owned = build_owned_gsl(PatL, node, 5, Symmetry); - MyList *tsrc = 0, *tdst = 0; - build_gstl(src_owned, dst_buffer, &tsrc, &tdst); - - if (tsrc) - { - if (combined_src[node]) - combined_src[node]->catList(tsrc); - else - combined_src[node] = tsrc; - } - if (tdst) - { - if (combined_dst[node]) - combined_dst[node]->catList(tdst); - else - combined_dst[node] = tdst; - } - - if (src_owned) - src_owned->destroyList(); - } - if (dst_buffer) - dst_buffer->destroyList(); - - // Phase C: Single transfer - transfer(combined_src, combined_dst, VarList, VarList, Symmetry); - - // Phase D: Cleanup - for (int node = 0; node < cpusize; node++) - { - if (combined_src[node]) - combined_src[node]->destroyList(); - if (combined_dst[node]) - combined_dst[node]->destroyList(); - } - delete[] combined_src; - delete[] combined_dst; -} -// SyncCache constructor -Parallel::SyncCache::SyncCache() - : valid(false), cpusize(0), combined_src(0), combined_dst(0), - send_lengths(0), recv_lengths(0), send_bufs(0), recv_bufs(0), - send_buf_caps(0), recv_buf_caps(0), reqs(0), stats(0), max_reqs(0), - lengths_valid(false) -{ -} -// SyncCache invalidate: free grid segment lists but keep buffers -void Parallel::SyncCache::invalidate() -{ - if (!valid) - return; - for (int i = 0; i < cpusize; i++) - { - if (combined_src[i]) - combined_src[i]->destroyList(); - if (combined_dst[i]) - combined_dst[i]->destroyList(); - combined_src[i] = combined_dst[i] = 0; - send_lengths[i] = recv_lengths[i] = 0; - } - valid = false; - lengths_valid = false; -} -// SyncCache destroy: free everything -void Parallel::SyncCache::destroy() -{ - invalidate(); - if (combined_src) delete[] combined_src; - if (combined_dst) delete[] combined_dst; - if (send_lengths) delete[] send_lengths; - if (recv_lengths) delete[] recv_lengths; - if (send_buf_caps) delete[] send_buf_caps; - if (recv_buf_caps) delete[] recv_buf_caps; - for (int i = 0; i < cpusize; i++) - { - if (send_bufs && send_bufs[i]) delete[] send_bufs[i]; - if (recv_bufs && recv_bufs[i]) delete[] recv_bufs[i]; - } - if (send_bufs) delete[] send_bufs; - if (recv_bufs) delete[] recv_bufs; - if (reqs) delete[] reqs; - if (stats) delete[] stats; - combined_src = combined_dst = 0; - send_lengths = recv_lengths = 0; - send_buf_caps = recv_buf_caps = 0; - send_bufs = recv_bufs = 0; - reqs = 0; stats = 0; - cpusize = 0; max_reqs = 0; -} -// transfer_cached: reuse pre-allocated buffers from SyncCache -void Parallel::transfer_cached(MyList **src, MyList **dst, - MyList *VarList1, MyList *VarList2, - int Symmetry, SyncCache &cache) -{ - int myrank; - MPI_Comm_size(MPI_COMM_WORLD, &cache.cpusize); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - int cpusize = cache.cpusize; - - int req_no = 0; - int node; - - for (node = 0; node < cpusize; node++) - { - if (node == myrank) - { - int length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry); - cache.recv_lengths[node] = length; - if (length > 0) - { - if (length > cache.recv_buf_caps[node]) - { - if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node]; - cache.recv_bufs[node] = new double[length]; - cache.recv_buf_caps[node] = length; - } - data_packer(cache.recv_bufs[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry); - } - } - else - { - // send - int slength = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry); - cache.send_lengths[node] = slength; - if (slength > 0) - { - if (slength > cache.send_buf_caps[node]) - { - if (cache.send_bufs[node]) delete[] cache.send_bufs[node]; - cache.send_bufs[node] = new double[slength]; - cache.send_buf_caps[node] = slength; - } - data_packer(cache.send_bufs[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry); - MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, cache.reqs + req_no++); - } - // recv - int rlength = data_packer(0, src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry); - cache.recv_lengths[node] = rlength; - if (rlength > 0) - { - if (rlength > cache.recv_buf_caps[node]) - { - if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node]; - cache.recv_bufs[node] = new double[rlength]; - cache.recv_buf_caps[node] = rlength; - } - MPI_Irecv((void *)cache.recv_bufs[node], rlength, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, cache.reqs + req_no++); - } - } - } - - MPI_Waitall(req_no, cache.reqs, cache.stats); - - for (node = 0; node < cpusize; node++) - if (cache.recv_bufs[node] && cache.recv_lengths[node] > 0) - data_packer(cache.recv_bufs[node], src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry); -} -// Sync_cached: build grid segment lists on first call, reuse on subsequent calls -void Parallel::Sync_cached(MyList *PatL, MyList *VarList, int Symmetry, SyncCache &cache) -{ - if (!cache.valid) - { - int cpusize; - MPI_Comm_size(MPI_COMM_WORLD, &cpusize); - cache.cpusize = cpusize; - - // Allocate cache arrays if needed - if (!cache.combined_src) - { - cache.combined_src = new MyList *[cpusize]; - cache.combined_dst = new MyList *[cpusize]; - cache.send_lengths = new int[cpusize]; - cache.recv_lengths = new int[cpusize]; - cache.send_bufs = new double *[cpusize]; - cache.recv_bufs = new double *[cpusize]; - cache.send_buf_caps = new int[cpusize]; - cache.recv_buf_caps = new int[cpusize]; - for (int i = 0; i < cpusize; i++) - { - cache.send_bufs[i] = cache.recv_bufs[i] = 0; - cache.send_buf_caps[i] = cache.recv_buf_caps[i] = 0; - } - cache.max_reqs = 2 * cpusize; - cache.reqs = new MPI_Request[cache.max_reqs]; - cache.stats = new MPI_Status[cache.max_reqs]; - } - - for (int node = 0; node < cpusize; node++) - { - cache.combined_src[node] = cache.combined_dst[node] = 0; - cache.send_lengths[node] = cache.recv_lengths[node] = 0; - } - - // Build intra-patch segments (same as Sync_merged Phase A) - MyList *Pp = PatL; - while (Pp) - { - Patch *Pat = Pp->data; - MyList *dst_ghost = build_ghost_gsl(Pat); - for (int node = 0; node < cpusize; node++) - { - MyList *src_owned = build_owned_gsl0(Pat, node); - MyList *tsrc = 0, *tdst = 0; - build_gstl(src_owned, dst_ghost, &tsrc, &tdst); - if (tsrc) - { - if (cache.combined_src[node]) - cache.combined_src[node]->catList(tsrc); - else - cache.combined_src[node] = tsrc; - } - if (tdst) - { - if (cache.combined_dst[node]) - cache.combined_dst[node]->catList(tdst); - else - cache.combined_dst[node] = tdst; - } - if (src_owned) src_owned->destroyList(); - } - if (dst_ghost) dst_ghost->destroyList(); - Pp = Pp->next; - } - - // Build inter-patch segments (same as Sync_merged Phase B) - MyList *dst_buffer = build_buffer_gsl(PatL); - for (int node = 0; node < cpusize; node++) - { - MyList *src_owned = build_owned_gsl(PatL, node, 5, Symmetry); - MyList *tsrc = 0, *tdst = 0; - build_gstl(src_owned, dst_buffer, &tsrc, &tdst); - if (tsrc) - { - if (cache.combined_src[node]) - cache.combined_src[node]->catList(tsrc); - else - cache.combined_src[node] = tsrc; - } - if (tdst) - { - if (cache.combined_dst[node]) - cache.combined_dst[node]->catList(tdst); - else - cache.combined_dst[node] = tdst; - } - if (src_owned) src_owned->destroyList(); - } - if (dst_buffer) dst_buffer->destroyList(); - - cache.valid = true; - } - - // Use cached lists with buffer-reusing transfer - transfer_cached(cache.combined_src, cache.combined_dst, VarList, VarList, Symmetry, cache); -} -// Sync_start: pack and post MPI_Isend/Irecv, return immediately -void Parallel::Sync_start(MyList *PatL, MyList *VarList, int Symmetry, - SyncCache &cache, AsyncSyncState &state) -{ - // Ensure cache is built - if (!cache.valid) - { - // Build cache (same logic as Sync_cached) - int cpusize; - MPI_Comm_size(MPI_COMM_WORLD, &cpusize); - cache.cpusize = cpusize; - - if (!cache.combined_src) - { - cache.combined_src = new MyList *[cpusize]; - cache.combined_dst = new MyList *[cpusize]; - cache.send_lengths = new int[cpusize]; - cache.recv_lengths = new int[cpusize]; - cache.send_bufs = new double *[cpusize]; - cache.recv_bufs = new double *[cpusize]; - cache.send_buf_caps = new int[cpusize]; - cache.recv_buf_caps = new int[cpusize]; - for (int i = 0; i < cpusize; i++) - { - cache.send_bufs[i] = cache.recv_bufs[i] = 0; - cache.send_buf_caps[i] = cache.recv_buf_caps[i] = 0; - } - cache.max_reqs = 2 * cpusize; - cache.reqs = new MPI_Request[cache.max_reqs]; - cache.stats = new MPI_Status[cache.max_reqs]; - } - - for (int node = 0; node < cpusize; node++) - { - cache.combined_src[node] = cache.combined_dst[node] = 0; - cache.send_lengths[node] = cache.recv_lengths[node] = 0; - } - - MyList *Pp = PatL; - while (Pp) - { - Patch *Pat = Pp->data; - MyList *dst_ghost = build_ghost_gsl(Pat); - for (int node = 0; node < cpusize; node++) - { - MyList *src_owned = build_owned_gsl0(Pat, node); - MyList *tsrc = 0, *tdst = 0; - build_gstl(src_owned, dst_ghost, &tsrc, &tdst); - if (tsrc) - { - if (cache.combined_src[node]) - cache.combined_src[node]->catList(tsrc); - else - cache.combined_src[node] = tsrc; - } - if (tdst) - { - if (cache.combined_dst[node]) - cache.combined_dst[node]->catList(tdst); - else - cache.combined_dst[node] = tdst; - } - if (src_owned) src_owned->destroyList(); - } - if (dst_ghost) dst_ghost->destroyList(); - Pp = Pp->next; - } - - MyList *dst_buffer = build_buffer_gsl(PatL); - for (int node = 0; node < cpusize; node++) - { - MyList *src_owned = build_owned_gsl(PatL, node, 5, Symmetry); - MyList *tsrc = 0, *tdst = 0; - build_gstl(src_owned, dst_buffer, &tsrc, &tdst); - if (tsrc) - { - if (cache.combined_src[node]) - cache.combined_src[node]->catList(tsrc); - else - cache.combined_src[node] = tsrc; - } - if (tdst) - { - if (cache.combined_dst[node]) - cache.combined_dst[node]->catList(tdst); - else - cache.combined_dst[node] = tdst; - } - if (src_owned) src_owned->destroyList(); - } - if (dst_buffer) dst_buffer->destroyList(); - cache.valid = true; - } - - // Now pack and post async MPI operations - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - int cpusize = cache.cpusize; - state.req_no = 0; - state.active = true; - - MyList **src = cache.combined_src; - MyList **dst = cache.combined_dst; - - for (int node = 0; node < cpusize; node++) - { - if (node == myrank) - { - int length; - if (!cache.lengths_valid) { - length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry); - cache.recv_lengths[node] = length; - } else { - length = cache.recv_lengths[node]; - } - if (length > 0) - { - if (length > cache.recv_buf_caps[node]) - { - if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node]; - cache.recv_bufs[node] = new double[length]; - cache.recv_buf_caps[node] = length; - } - data_packer(cache.recv_bufs[node], src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry); - } - } - else - { - int slength; - if (!cache.lengths_valid) { - slength = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry); - cache.send_lengths[node] = slength; - } else { - slength = cache.send_lengths[node]; - } - if (slength > 0) - { - if (slength > cache.send_buf_caps[node]) - { - if (cache.send_bufs[node]) delete[] cache.send_bufs[node]; - cache.send_bufs[node] = new double[slength]; - cache.send_buf_caps[node] = slength; - } - data_packer(cache.send_bufs[node], src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry); - MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, 2, MPI_COMM_WORLD, cache.reqs + state.req_no++); - } - int rlength; - if (!cache.lengths_valid) { - rlength = data_packer(0, src[node], dst[node], node, UNPACK, VarList, VarList, Symmetry); - cache.recv_lengths[node] = rlength; - } else { - rlength = cache.recv_lengths[node]; - } - if (rlength > 0) - { - if (rlength > cache.recv_buf_caps[node]) - { - if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node]; - cache.recv_bufs[node] = new double[rlength]; - cache.recv_buf_caps[node] = rlength; - } - MPI_Irecv((void *)cache.recv_bufs[node], rlength, MPI_DOUBLE, node, 2, MPI_COMM_WORLD, cache.reqs + state.req_no++); - } - } - } - cache.lengths_valid = true; -} -// Sync_finish: wait for async MPI operations and unpack -void Parallel::Sync_finish(SyncCache &cache, AsyncSyncState &state, - MyList *VarList, int Symmetry) -{ - if (!state.active) - return; - - MPI_Waitall(state.req_no, cache.reqs, cache.stats); - - int cpusize = cache.cpusize; - MyList **src = cache.combined_src; - MyList **dst = cache.combined_dst; - - for (int node = 0; node < cpusize; node++) - if (cache.recv_bufs[node] && cache.recv_lengths[node] > 0) - data_packer(cache.recv_bufs[node], src[node], dst[node], node, UNPACK, VarList, VarList, Symmetry); - - state.active = false; -} -// collect buffer grid segments or blocks for the periodic boundary condition of given patch -// --------------------------------------------------- -// |con | |con | -// |ner | PhysBD |ner | -// |-------------------------------------------------| -// | | | | -// |Phy | |Phy | -// |sBD | |BD | -// | | | | -// | | | | -// | | | | -// |-------------------------------------------------| -// |con | PhysBD |con | -// |ner | |ner | -// --------------------------------------------------- -// first order derivetive does not need conner information, -// but second order derivative needs! -/* the following code does not include conner part -MyList *Parallel::build_PhysBD_gsl(Patch *Pat) -{ - MyList *cgsl,*gsc,*gsb=0,*p; - gsc = build_ghost_gsl(Pat); - for(int i=0;idata->Bg->getdX(i); -// lower boundary - if(gsb) - { - p = new MyList; - p->data = new Parallel::gridseg; - p->next=gsb; - gsb=p; - } - else - { - gsb = new MyList; - gsb->data = new Parallel::gridseg; - gsb->next=0; - } - for(int j=0;jdata->llb[i] = Pat->bbox[i]-ghost_width*DH; - gsb->data->uub[i] = Pat->bbox[i]-DH; -#else -#ifdef Cell - gsb->data->llb[i] = Pat->bbox[i]-ghost_width*DH; - gsb->data->uub[i] = Pat->bbox[i]; -#else -#error Not define Vertex nor Cell -#endif -#endif - gsb->data->shape[i] = ghost_width; - } - else - { - gsb->data->llb[j] = Pat->bbox[j]; - gsb->data->uub[j] = Pat->bbox[j+dim]; - gsb->data->shape[j] = Pat->shape[j]; - } - } - gsb->data->Bg = 0; //vertual grid segment -// upper boundary - p = new MyList; - p->data = new Parallel::gridseg; - p->next=gsb; - gsb=p; - for(int j=0;jdata->llb[i] = Pat->bbox[i+dim]+DH; - gsb->data->uub[i] = Pat->bbox[i+dim]+ghost_width*DH; -#else -#ifdef Cell - gsb->data->llb[i] = Pat->bbox[i+dim]; - gsb->data->uub[i] = Pat->bbox[i+dim]+ghost_width*DH; -#else -#error Not define Vertex nor Cell -#endif -#endif - gsb->data->shape[i] = ghost_width; - } - else - { - gsb->data->llb[j] = Pat->bbox[j]; - gsb->data->uub[j] = Pat->bbox[j+dim]; - gsb->data->shape[j] = Pat->shape[j]; - } - } - gsb->data->Bg = 0; //vertual grid segment - } - - cgsl = gsl_and(gsc,gsb); - - gsc->destroyList(); - gsb->destroyList(); - - return cgsl; -} -*/ -// the following code includes conner part -MyList *Parallel::build_PhysBD_gsl(Patch *Pat) -{ - MyList *cgsl, *gsc, *gsb = 0, *p; - - gsc = build_complete_gsl(Pat); - - gsb = new MyList; - gsb->data = new Parallel::gridseg; - gsb->next = 0; - gsb->data->Bg = 0; - - for (int j = 0; j < dim; j++) - { - gsb->data->llb[j] = Pat->bbox[j]; - gsb->data->uub[j] = Pat->bbox[j + dim]; - gsb->data->shape[j] = Pat->shape[j]; - } - - p = gsl_subtract(gsc, gsb); - - gsc->destroyList(); - gsb->destroyList(); - - cgsl = divide_gsl(p, Pat); - - p->destroyList(); - - return cgsl; -} -MyList *Parallel::divide_gsl(MyList *p, Patch *Pat) -{ - MyList *cgsl = 0; - while (p) - { - if (cgsl) - cgsl->catList(divide_gs(p, Pat)); - else - cgsl = divide_gs(p, Pat); - p = p->next; - } - - return cgsl; -} -// divide the gs into pices which locate either totally outside of the given Patch coordinate range -// or totally inside it. It's usefull for periodic boundary condition -MyList *Parallel::divide_gs(MyList *p, Patch *Pat) -{ - double DH[dim]; - for (int i = 0; i < dim; i++) - { - DH[i] = p->data->Bg->getdX(i); - } - - int num[dim]; - double llb[3][dim], uub[3][dim]; - for (int i = 0; i < dim; i++) - { - if (p->data->llb[i] < Pat->bbox[i] - DH[i] / 2) - { - if (p->data->uub[i] > Pat->bbox[i + dim] + DH[i] / 2) - { - num[i] = 3; - llb[0][i] = p->data->llb[i]; - llb[1][i] = Pat->bbox[i]; - uub[1][i] = Pat->bbox[i + dim]; - uub[2][i] = p->data->uub[i]; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - uub[0][i] = Pat->bbox[i] - DH[i]; - llb[2][i] = Pat->bbox[i + dim] + DH[i]; -#else -#ifdef Cell - uub[0][i] = Pat->bbox[i]; - llb[2][i] = Pat->bbox[i + dim]; -#else -#error Not define Vertex nor Cell -#endif -#endif - } - else if (p->data->uub[i] > Pat->bbox[i] + DH[i] / 2) - { - num[i] = 2; - llb[0][i] = p->data->llb[i]; - llb[1][i] = Pat->bbox[i]; - uub[1][i] = p->data->uub[i]; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - uub[0][i] = Pat->bbox[i] - DH[i]; -#else -#ifdef Cell - uub[0][i] = Pat->bbox[i]; -#else -#error Not define Vertex nor Cell -#endif -#endif - } - else - { - num[i] = 1; - llb[0][i] = p->data->llb[i]; - uub[0][i] = p->data->uub[i]; - } - } - else if (p->data->llb[i] < Pat->bbox[i + dim] - DH[i] / 2) - { - if (p->data->uub[i] > Pat->bbox[i + dim] + DH[i] / 2) - { - num[i] = 2; - llb[0][i] = p->data->llb[i]; - uub[0][i] = Pat->bbox[i + dim]; - uub[1][i] = p->data->uub[i]; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - llb[1][i] = Pat->bbox[i + dim] + DH[i]; -#else -#ifdef Cell - llb[1][i] = Pat->bbox[i + dim]; -#else -#error Not define Vertex nor Cell -#endif -#endif - } - else - { - num[i] = 1; - llb[0][i] = p->data->llb[i]; - uub[0][i] = p->data->uub[i]; - } - } - else - { - num[i] = 1; - llb[0][i] = p->data->llb[i]; - uub[0][i] = p->data->uub[i]; - } - } - MyList *cgsl = 0, *gg; - int NN = 1; - for (int i = 0; i < dim; i++) - NN = NN * num[i]; - - for (int i = 0; i < NN; i++) - { - int ind[dim]; - getarrayindex(dim, num, ind, i); - gg = clone_gsl(p, true); - for (int k = 0; k < dim; k++) - { - gg->data->llb[k] = llb[ind[k]][k]; - gg->data->uub[k] = uub[ind[k]][k]; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - gg->data->shape[k] = int((uub[ind[k]][k] - llb[ind[k]][k]) / DH[k] + 0.4) + 1; -#else -#ifdef Cell - gg->data->shape[k] = int((uub[ind[k]][k] - llb[ind[k]][k]) / DH[k] + 0.4); -#else -#error Not define Vertex nor Cell -#endif -#endif - } - - if (cgsl) - cgsl->catList(gg); - else - cgsl = gg; - } - - return cgsl; -} -// after mod operation, according to overlape to determine real grid segments -void Parallel::build_PhysBD_gstl(Patch *Pat, MyList *srci, MyList *dsti, - MyList **out_src, MyList **out_dst) -{ - *out_src = *out_dst = 0; - - if (!srci || !dsti) - return; - - MyList *s, *d; - MyList *s2, *d2; - - double llb[dim], uub[dim]; - - s = srci; - while (s) - { - Parallel::gridseg *sd = s->data; - d = dsti; - while (d) - { - Parallel::gridseg *dd = d->data; - bool flag = true; - for (int i = 0; i < dim; i++) - { - double SH = sd->Bg->getdX(i), DH = dd->Bg->getdX(i); - if (!feq(SH, DH, SH / 2)) - { - cout << "Parallel::build_PhysBD_gstl meets different grid space SH = " << SH << ", DH = " << DH << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - // we assume dst and src locate on the same Patch - if (dd->llb[i] < Pat->bbox[i]) - llb[i] = Mymax(sd->llb[i], dd->llb[i] + Pat->bbox[dim + i] - Pat->bbox[i]); - else if (dd->llb[i] > Pat->bbox[i + dim]) - llb[i] = Mymax(sd->llb[i], dd->llb[i] - Pat->bbox[dim + i] + Pat->bbox[i]); - else - llb[i] = Mymax(sd->llb[i], dd->llb[i]); - - if (dd->uub[i] < Pat->bbox[i]) - uub[i] = Mymin(sd->uub[i], dd->uub[i] + Pat->bbox[dim + i] - Pat->bbox[i]); - else if (dd->uub[i] > Pat->bbox[dim + i]) - uub[i] = Mymin(sd->uub[i], dd->uub[i] - Pat->bbox[dim + i] + Pat->bbox[i]); - else - uub[i] = Mymin(sd->uub[i], dd->uub[i]); -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - if (llb[i] > uub[i] + SH / 2) - { - flag = false; - break; - } // special for isolated point -#else -#ifdef Cell - if (llb[i] > uub[i]) - { - flag = false; - break; - } -#else -#error Not define Vertex nor Cell -#endif -#endif - } - - if (flag) - { - if (!(*out_src)) - { - *out_src = s2 = new MyList; - *out_dst = d2 = new MyList; - s2->data = new Parallel::gridseg; - d2->data = new Parallel::gridseg; - } - else - { - s2->next = new MyList; - s2 = s2->next; - d2->next = new MyList; - d2 = d2->next; - s2->data = new Parallel::gridseg; - d2->data = new Parallel::gridseg; - } - - for (int i = 0; i < dim; i++) - { - double SH = sd->Bg->getdX(i), DH = dd->Bg->getdX(i); - s2->data->llb[i] = llb[i]; - s2->data->uub[i] = uub[i]; - - if (dd->llb[i] < Pat->bbox[i]) - d2->data->llb[i] = llb[i] - Pat->bbox[dim + i] + Pat->bbox[i]; - else if (dd->llb[i] > Pat->bbox[i + dim]) - d2->data->llb[i] = llb[i] + Pat->bbox[dim + i] - Pat->bbox[i]; - else - d2->data->llb[i] = llb[i]; - - if (dd->uub[i] < Pat->bbox[i]) - d2->data->uub[i] = uub[i] - Pat->bbox[dim + i] + Pat->bbox[i]; - else if (dd->uub[i] > Pat->bbox[dim + i]) - d2->data->uub[i] = uub[i] + Pat->bbox[dim + i] - Pat->bbox[i]; - else - d2->data->uub[i] = uub[i]; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - s2->data->shape[i] = int((s2->data->uub[i] - s2->data->llb[i]) / SH + 0.4) + 1; - d2->data->shape[i] = int((d2->data->uub[i] - d2->data->llb[i]) / DH + 0.4) + 1; -#else -#ifdef Cell - s2->data->shape[i] = int((s2->data->uub[i] - s2->data->llb[i]) / SH + 0.4); - d2->data->shape[i] = int((d2->data->uub[i] - d2->data->llb[i]) / DH + 0.4); -#else -#error Not define Vertex nor Cell -#endif -#endif - } - s2->data->Bg = sd->Bg; - s2->next = 0; - d2->data->Bg = dd->Bg; - d2->next = 0; - } - d = d->next; - } - s = s->next; - } -} -void Parallel::PeriodicBD(Patch *Pat, MyList *VarList, int Symmetry) -{ - int cpusize; - MPI_Comm_size(MPI_COMM_WORLD, &cpusize); - - MyList *dst; - MyList **src, **transfer_src, **transfer_dst; - src = new MyList *[cpusize]; - transfer_src = new MyList *[cpusize]; - transfer_dst = new MyList *[cpusize]; - - dst = build_PhysBD_gsl(Pat); - for (int node = 0; node < cpusize; node++) - { - src[node] = build_owned_gsl0(Pat, node); // for the part without ghost points and do not extend - build_PhysBD_gstl(Pat, src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node - } - - transfer(transfer_src, transfer_dst, VarList, VarList, Symmetry); - - if (dst) - dst->destroyList(); - for (int node = 0; node < cpusize; node++) - { - if (src[node]) - src[node]->destroyList(); - if (transfer_src[node]) - transfer_src[node]->destroyList(); - if (transfer_dst[node]) - transfer_dst[node]->destroyList(); - } - - delete[] src; - delete[] transfer_src; - delete[] transfer_dst; -} -double Parallel::L2Norm(Patch *Pat, var *vf) -{ - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - - double tvf, dtvf = 0; - int BDW = ghost_width; - - MyList *BP = Pat->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - f_l2normhelper(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pat->bbox[0], Pat->bbox[1], Pat->bbox[2], - Pat->bbox[3], Pat->bbox[4], Pat->bbox[5], - cg->fgfs[vf->sgfn], tvf, BDW); - dtvf += tvf; - } - if (BP == Pat->ble) - break; - BP = BP->next; - } - - MPI_Allreduce(&dtvf, &tvf, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - - tvf = sqrt(tvf); - - return tvf; -} -double Parallel::L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here) -{ - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - - double tvf, dtvf = 0; - int BDW = ghost_width; - - MyList *BP = Pat->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - f_l2normhelper(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pat->bbox[0], Pat->bbox[1], Pat->bbox[2], - Pat->bbox[3], Pat->bbox[4], Pat->bbox[5], - cg->fgfs[vf->sgfn], tvf, BDW); - dtvf += tvf; - } - if (BP == Pat->ble) - break; - BP = BP->next; - } - - MPI_Allreduce(&dtvf, &tvf, 1, MPI_DOUBLE, MPI_SUM, Comm_here); - - tvf = sqrt(tvf); - - return tvf; -} -void Parallel::checkgsl(MyList *pp, bool first_only) -{ - int myrank = 0; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - if (myrank == 0) - { - if (!pp) - cout << " Parallel::checkgsl meets empty gsl" << endl; - while (pp) - { - if (pp->data->Bg) - cout << " on node#" << pp->data->Bg->rank << endl; - else - cout << " virtual grid segment" << endl; - cout << " shape: ("; - for (int i = 0; i < dim; i++) - { - if (i < dim - 1) - cout << pp->data->shape[i] << ","; - else - cout << pp->data->shape[i] << ")" << endl; - } - cout << " range: ("; - for (int i = 0; i < dim; i++) - { - if (i < dim - 1) - cout << pp->data->llb[i] << ":" << pp->data->uub[i] << ","; - else - cout << pp->data->llb[i] << ":" << pp->data->uub[i] << ")" << endl; - } - if (first_only) - return; - pp = pp->next; - } - } -} -void Parallel::checkvarl(MyList *pp, bool first_only) -{ - int myrank = 0; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - if (myrank == 0) - { - while (pp) - { - cout << "name: " << pp->data->name << endl; - cout << "SoA = (" << pp->data->SoA[0] << "," << pp->data->SoA[1] << "," << pp->data->SoA[2] << ")" << endl; - cout << "sgfn = " << pp->data->sgfn << endl; - if (first_only) - return; - pp = pp->next; - } - } -} -void Parallel::prepare_inter_time_level(MyList *PatL, - MyList *VarList1 /* source (t+dt) */, MyList *VarList2 /* source (t) */, - MyList *VarList3 /* target (t+a*dt) */, int tindex) -{ - while (PatL) - { - prepare_inter_time_level(PatL->data, VarList1, VarList2, VarList3, tindex); - PatL = PatL->next; - } -} -void Parallel::prepare_inter_time_level(Patch *Pat, - MyList *VarList1 /* source (t+dt) */, MyList *VarList2 /* source (t) */, - MyList *VarList3 /* target (t+a*dt) */, int tindex) -{ - int myrank = 0; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - - MyList *varl1; - MyList *varl2; - MyList *varl3; - - MyList *BP = Pat->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - varl1 = VarList1; - varl2 = VarList2; - varl3 = VarList3; - while (varl1) - { - if (tindex == 0) - f_average(cg->shape, cg->fgfs[varl1->data->sgfn], cg->fgfs[varl2->data->sgfn], cg->fgfs[varl3->data->sgfn]); - else if (tindex == 1) - f_average3(cg->shape, cg->fgfs[varl1->data->sgfn], cg->fgfs[varl2->data->sgfn], cg->fgfs[varl3->data->sgfn]); - else if (tindex == -1) - // just change data order to use average3 - f_average3(cg->shape, cg->fgfs[varl2->data->sgfn], cg->fgfs[varl1->data->sgfn], cg->fgfs[varl3->data->sgfn]); - else - { - cout << "error tindex in Parallel::prepare_inter_time_level" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - varl1 = varl1->next; - varl2 = varl2->next; - varl3 = varl3->next; - } - } - if (BP == Pat->ble) - break; - BP = BP->next; - } -} -void Parallel::prepare_inter_time_level(MyList *PatL, - MyList *VarList1 /* source (t+dt) */, MyList *VarList2 /* source (t) */, - MyList *VarList3 /* source (t-dt) */, MyList *VarList4 /* target (t+a*dt) */, int tindex) -{ - while (PatL) - { - prepare_inter_time_level(PatL->data, VarList1, VarList2, VarList3, VarList4, tindex); - PatL = PatL->next; - } -} -void Parallel::prepare_inter_time_level(Patch *Pat, - MyList *VarList1 /* source (t+dt) */, MyList *VarList2 /* source (t) */, - MyList *VarList3 /* source (t-dt) */, MyList *VarList4 /* target (t+a*dt) */, int tindex) -{ - int myrank = 0; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - - MyList *varl1; - MyList *varl2; - MyList *varl3; - MyList *varl4; - - MyList *BP = Pat->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - varl1 = VarList1; - varl2 = VarList2; - varl3 = VarList3; - varl4 = VarList4; - while (varl1) - { - if (tindex == 0) - f_average2(cg->shape, cg->fgfs[varl1->data->sgfn], cg->fgfs[varl2->data->sgfn], - cg->fgfs[varl3->data->sgfn], cg->fgfs[varl4->data->sgfn]); - else if (tindex == 1) - f_average2p(cg->shape, cg->fgfs[varl1->data->sgfn], cg->fgfs[varl2->data->sgfn], - cg->fgfs[varl3->data->sgfn], cg->fgfs[varl4->data->sgfn]); - else if (tindex == -1) - f_average2m(cg->shape, cg->fgfs[varl1->data->sgfn], cg->fgfs[varl2->data->sgfn], - cg->fgfs[varl3->data->sgfn], cg->fgfs[varl4->data->sgfn]); - else - { - cout << "error tindex in long cgh::prepare_inter_time_level" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - varl1 = varl1->next; - varl2 = varl2->next; - varl3 = varl3->next; - varl4 = varl4->next; - } - } - if (BP == Pat->ble) - break; - BP = BP->next; - } -} -void Parallel::Prolong(Patch *Patc, Patch *Patf, - MyList *VarList1 /* source */, MyList *VarList2 /* target */, - int Symmetry) -{ - if (Patc->lev >= Patf->lev) - { - cout << "Parallel::Prolong: meet requst of Prolong from lev#" << Patc->lev << " to lev#" << Patf->lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - int cpusize; - MPI_Comm_size(MPI_COMM_WORLD, &cpusize); - - MyList *dst; - MyList **src, **transfer_src, **transfer_dst; - src = new MyList *[cpusize]; - transfer_src = new MyList *[cpusize]; - transfer_dst = new MyList *[cpusize]; - - dst = build_complete_gsl(Patf); // including ghost - for (int node = 0; node < cpusize; node++) - { - src[node] = build_owned_gsl4(Patc, node, Symmetry); // - buffer - ghost - BD ghost - build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node - } - - transfer(transfer_src, transfer_dst, VarList1, VarList2, Symmetry); - - if (dst) - dst->destroyList(); - for (int node = 0; node < cpusize; node++) - { - if (src[node]) - src[node]->destroyList(); - if (transfer_src[node]) - transfer_src[node]->destroyList(); - if (transfer_dst[node]) - transfer_dst[node]->destroyList(); - } - - delete[] src; - delete[] transfer_src; - delete[] transfer_dst; -} -void Parallel::Restrict(MyList *PatcL, MyList *PatfL, - MyList *VarList1 /* source */, MyList *VarList2 /* target */, - int Symmetry) -{ - if (PatcL->data->lev >= PatfL->data->lev) - { - cout << "Parallel::Restrict: meet requst of Restrict from lev#" << PatfL->data->lev << " to lev#" << PatcL->data->lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - int cpusize; - MPI_Comm_size(MPI_COMM_WORLD, &cpusize); - - MyList *dst; - MyList **src, **transfer_src, **transfer_dst; - src = new MyList *[cpusize]; - transfer_src = new MyList *[cpusize]; - transfer_dst = new MyList *[cpusize]; - - dst = build_complete_gsl(PatcL); // including ghost - for (int node = 0; node < cpusize; node++) - { -#if 0 -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - src[node]=build_owned_gsl(PatfL,node,2,Symmetry); // - buffer - ghost -#else -#ifdef Cell - src[node]=build_owned_gsl(PatfL,node,4,Symmetry); // - buffer - ghost - BD ghost -#else -#error Not define Vertex nor Cell -#endif -#endif -#else - // it seems bam always use this - src[node] = build_owned_gsl(PatfL, node, 2, Symmetry); // - buffer - ghost -#endif - build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node - } - - transfer(transfer_src, transfer_dst, VarList1, VarList2, Symmetry); - - if (dst) - dst->destroyList(); - for (int node = 0; node < cpusize; node++) - { - if (src[node]) - src[node]->destroyList(); - if (transfer_src[node]) - transfer_src[node]->destroyList(); - if (transfer_dst[node]) - transfer_dst[node]->destroyList(); - } - - delete[] src; - delete[] transfer_src; - delete[] transfer_dst; -} -void Parallel::Restrict_after(MyList *PatcL, MyList *PatfL, - MyList *VarList1 /* source */, MyList *VarList2 /* target */, - int Symmetry) -{ - if (PatcL->data->lev >= PatfL->data->lev) - { - cout << "Parallel::Restrict: meet requst of Restrict from lev#" << PatfL->data->lev << " to lev#" << PatcL->data->lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - int cpusize; - MPI_Comm_size(MPI_COMM_WORLD, &cpusize); - - MyList *dst; - MyList **src, **transfer_src, **transfer_dst; - src = new MyList *[cpusize]; - transfer_src = new MyList *[cpusize]; - transfer_dst = new MyList *[cpusize]; - - dst = build_complete_gsl(PatcL); // including ghost - for (int node = 0; node < cpusize; node++) - { - src[node] = build_owned_gsl(PatfL, node, 3, Symmetry); // - ghost - BD ghost - - build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node - } - - transfer(transfer_src, transfer_dst, VarList1, VarList2, Symmetry); - - if (dst) - dst->destroyList(); - for (int node = 0; node < cpusize; node++) - { - if (src[node]) - src[node]->destroyList(); - if (transfer_src[node]) - transfer_src[node]->destroyList(); - if (transfer_dst[node]) - transfer_dst[node]->destroyList(); - } - - delete[] src; - delete[] transfer_src; - delete[] transfer_dst; -} -// for the same time level -void Parallel::OutBdLow2Hi(Patch *Patc, Patch *Patf, - MyList *VarList1 /* source */, MyList *VarList2 /* target */, - int Symmetry) -{ - if (Patc->lev >= Patf->lev) - { - cout << "Parallel::OutBdLow2Hi: meet requst of Prolong from lev#" << Patc->lev << " to lev#" << Patf->lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - int cpusize; - MPI_Comm_size(MPI_COMM_WORLD, &cpusize); - - MyList *dst; - MyList **src, **transfer_src, **transfer_dst; - src = new MyList *[cpusize]; - transfer_src = new MyList *[cpusize]; - transfer_dst = new MyList *[cpusize]; - - dst = build_buffer_gsl(Patf); // buffer region only - - for (int node = 0; node < cpusize; node++) - { - src[node] = build_owned_gsl4(Patc, node, Symmetry); // - buffer - ghost - BD ghost - build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node - } - - transfer(transfer_src, transfer_dst, VarList1, VarList2, Symmetry); - - if (dst) - dst->destroyList(); - for (int node = 0; node < cpusize; node++) - { - if (src[node]) - src[node]->destroyList(); - if (transfer_src[node]) - transfer_src[node]->destroyList(); - if (transfer_dst[node]) - transfer_dst[node]->destroyList(); - } - - delete[] src; - delete[] transfer_src; - delete[] transfer_dst; -} -void Parallel::OutBdLow2Hi(MyList *PatcL, MyList *PatfL, - MyList *VarList1 /* source */, MyList *VarList2 /* target */, - int Symmetry) -{ - MyList *Pp, *Ppc; - Ppc = PatcL; - while (Ppc) - { - Pp = PatfL; - while (Pp) - { - if (Ppc->data->lev >= Pp->data->lev) - { - cout << "Parallel::OutBdLow2Hi(list): meet requst of Prolong from lev#" << Ppc->data->lev << " to lev#" << Pp->data->lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - Pp = Pp->next; - } - Ppc = Ppc->next; - } - - int cpusize; - MPI_Comm_size(MPI_COMM_WORLD, &cpusize); - - MyList *dst; - MyList **src, **transfer_src, **transfer_dst; - src = new MyList *[cpusize]; - transfer_src = new MyList *[cpusize]; - transfer_dst = new MyList *[cpusize]; - - dst = build_buffer_gsl(PatfL); // buffer region only - - for (int node = 0; node < cpusize; node++) - { - src[node] = build_owned_gsl(PatcL, node, 4, Symmetry); // - buffer - ghost - BD ghost - build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node - } - - transfer(transfer_src, transfer_dst, VarList1, VarList2, Symmetry); - - if (dst) - dst->destroyList(); - for (int node = 0; node < cpusize; node++) - { - if (src[node]) - src[node]->destroyList(); - if (transfer_src[node]) - transfer_src[node]->destroyList(); - if (transfer_dst[node]) - transfer_dst[node]->destroyList(); - } - - delete[] src; - delete[] transfer_src; - delete[] transfer_dst; -} -// for the same time level -void Parallel::OutBdLow2Himix(Patch *Patc, Patch *Patf, - MyList *VarList1 /* source */, MyList *VarList2 /* target */, - int Symmetry) -{ - if (Patc->lev >= Patf->lev) - { - cout << "Parallel::OutBdLow2Himix: meet requst of Prolong from lev#" << Patc->lev << " to lev#" << Patf->lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - int cpusize; - MPI_Comm_size(MPI_COMM_WORLD, &cpusize); - - MyList *dst; - MyList **src, **transfer_src, **transfer_dst; - src = new MyList *[cpusize]; - transfer_src = new MyList *[cpusize]; - transfer_dst = new MyList *[cpusize]; - - dst = build_buffer_gsl(Patf); // buffer region only - - for (int node = 0; node < cpusize; node++) - { - src[node] = build_owned_gsl4(Patc, node, Symmetry); // - buffer - ghost - BD ghost - build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node - } - - transfermix(transfer_src, transfer_dst, VarList1, VarList2, Symmetry); - - if (dst) - dst->destroyList(); - for (int node = 0; node < cpusize; node++) - { - if (src[node]) - src[node]->destroyList(); - if (transfer_src[node]) - transfer_src[node]->destroyList(); - if (transfer_dst[node]) - transfer_dst[node]->destroyList(); - } - - delete[] src; - delete[] transfer_src; - delete[] transfer_dst; - - // do not need this, we have done after calling of this routine in ProlongRestrict or RestrictProlong - // Sync(Patf,VarList2,Symmetry); // fine level points may be not enough for interpolation -} -void Parallel::OutBdLow2Himix(MyList *PatcL, MyList *PatfL, - MyList *VarList1 /* source */, MyList *VarList2 /* target */, - int Symmetry) -{ - MyList *Pp, *Ppc; - Ppc = PatcL; - while (Ppc) - { - Pp = PatfL; - while (Pp) - { - if (Ppc->data->lev >= Pp->data->lev) - { - cout << "Parallel::OutBdLow2Himix(list): meet requst of Prolong from lev#" << Ppc->data->lev << " to lev#" << Pp->data->lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - Pp = Pp->next; - } - Ppc = Ppc->next; - } - - int cpusize; - MPI_Comm_size(MPI_COMM_WORLD, &cpusize); - - MyList *dst; - MyList **src, **transfer_src, **transfer_dst; - src = new MyList *[cpusize]; - transfer_src = new MyList *[cpusize]; - transfer_dst = new MyList *[cpusize]; - - dst = build_buffer_gsl(PatfL); // buffer region only - - for (int node = 0; node < cpusize; node++) - { - src[node] = build_owned_gsl(PatcL, node, 4, Symmetry); // - buffer - ghost - BD ghost - build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node - } - - transfermix(transfer_src, transfer_dst, VarList1, VarList2, Symmetry); - - if (dst) - dst->destroyList(); - for (int node = 0; node < cpusize; node++) - { - if (src[node]) - src[node]->destroyList(); - if (transfer_src[node]) - transfer_src[node]->destroyList(); - if (transfer_dst[node]) - transfer_dst[node]->destroyList(); - } - - delete[] src; - delete[] transfer_src; - delete[] transfer_dst; -} - -// Restrict_cached: cache grid segment lists, reuse buffers via transfer_cached -void Parallel::Restrict_cached(MyList *PatcL, MyList *PatfL, - MyList *VarList1, MyList *VarList2, - int Symmetry, SyncCache &cache) -{ - if (!cache.valid) - { - int cpusize; - MPI_Comm_size(MPI_COMM_WORLD, &cpusize); - cache.cpusize = cpusize; - - if (!cache.combined_src) - { - cache.combined_src = new MyList *[cpusize]; - cache.combined_dst = new MyList *[cpusize]; - cache.send_lengths = new int[cpusize]; - cache.recv_lengths = new int[cpusize]; - cache.send_bufs = new double *[cpusize]; - cache.recv_bufs = new double *[cpusize]; - cache.send_buf_caps = new int[cpusize]; - cache.recv_buf_caps = new int[cpusize]; - for (int i = 0; i < cpusize; i++) - { - cache.send_bufs[i] = cache.recv_bufs[i] = 0; - cache.send_buf_caps[i] = cache.recv_buf_caps[i] = 0; - } - cache.max_reqs = 2 * cpusize; - cache.reqs = new MPI_Request[cache.max_reqs]; - cache.stats = new MPI_Status[cache.max_reqs]; - } - - MyList *dst = build_complete_gsl(PatcL); - for (int node = 0; node < cpusize; node++) - { - MyList *src_owned = build_owned_gsl(PatfL, node, 2, Symmetry); - build_gstl(src_owned, dst, &cache.combined_src[node], &cache.combined_dst[node]); - if (src_owned) src_owned->destroyList(); - } - if (dst) dst->destroyList(); - - cache.valid = true; - } - - transfer_cached(cache.combined_src, cache.combined_dst, VarList1, VarList2, Symmetry, cache); -} - -// OutBdLow2Hi_cached: cache grid segment lists, reuse buffers via transfer_cached -void Parallel::OutBdLow2Hi_cached(MyList *PatcL, MyList *PatfL, - MyList *VarList1, MyList *VarList2, - int Symmetry, SyncCache &cache) -{ - if (!cache.valid) - { - int cpusize; - MPI_Comm_size(MPI_COMM_WORLD, &cpusize); - cache.cpusize = cpusize; - - if (!cache.combined_src) - { - cache.combined_src = new MyList *[cpusize]; - cache.combined_dst = new MyList *[cpusize]; - cache.send_lengths = new int[cpusize]; - cache.recv_lengths = new int[cpusize]; - cache.send_bufs = new double *[cpusize]; - cache.recv_bufs = new double *[cpusize]; - cache.send_buf_caps = new int[cpusize]; - cache.recv_buf_caps = new int[cpusize]; - for (int i = 0; i < cpusize; i++) - { - cache.send_bufs[i] = cache.recv_bufs[i] = 0; - cache.send_buf_caps[i] = cache.recv_buf_caps[i] = 0; - } - cache.max_reqs = 2 * cpusize; - cache.reqs = new MPI_Request[cache.max_reqs]; - cache.stats = new MPI_Status[cache.max_reqs]; - } - - MyList *dst = build_buffer_gsl(PatfL); - for (int node = 0; node < cpusize; node++) - { - MyList *src_owned = build_owned_gsl(PatcL, node, 4, Symmetry); - build_gstl(src_owned, dst, &cache.combined_src[node], &cache.combined_dst[node]); - if (src_owned) src_owned->destroyList(); - } - if (dst) dst->destroyList(); - - cache.valid = true; - } - - transfer_cached(cache.combined_src, cache.combined_dst, VarList1, VarList2, Symmetry, cache); -} - -// OutBdLow2Himix_cached: same as OutBdLow2Hi_cached but uses transfermix for unpacking -void Parallel::OutBdLow2Himix_cached(MyList *PatcL, MyList *PatfL, - MyList *VarList1, MyList *VarList2, - int Symmetry, SyncCache &cache) -{ - if (!cache.valid) - { - int cpusize; - MPI_Comm_size(MPI_COMM_WORLD, &cpusize); - cache.cpusize = cpusize; - - if (!cache.combined_src) - { - cache.combined_src = new MyList *[cpusize]; - cache.combined_dst = new MyList *[cpusize]; - cache.send_lengths = new int[cpusize]; - cache.recv_lengths = new int[cpusize]; - cache.send_bufs = new double *[cpusize]; - cache.recv_bufs = new double *[cpusize]; - cache.send_buf_caps = new int[cpusize]; - cache.recv_buf_caps = new int[cpusize]; - for (int i = 0; i < cpusize; i++) - { - cache.send_bufs[i] = cache.recv_bufs[i] = 0; - cache.send_buf_caps[i] = cache.recv_buf_caps[i] = 0; - } - cache.max_reqs = 2 * cpusize; - cache.reqs = new MPI_Request[cache.max_reqs]; - cache.stats = new MPI_Status[cache.max_reqs]; - } - - MyList *dst = build_buffer_gsl(PatfL); - for (int node = 0; node < cpusize; node++) - { - MyList *src_owned = build_owned_gsl(PatcL, node, 4, Symmetry); - build_gstl(src_owned, dst, &cache.combined_src[node], &cache.combined_dst[node]); - if (src_owned) src_owned->destroyList(); - } - if (dst) dst->destroyList(); - - cache.valid = true; - } - - // Use transfermix instead of transfer for mix-mode interpolation - int myrank; - MPI_Comm_size(MPI_COMM_WORLD, &cache.cpusize); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - int cpusize = cache.cpusize; - - int req_no = 0; - for (int node = 0; node < cpusize; node++) - { - if (node == myrank) - { - int length = data_packermix(0, cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry); - cache.recv_lengths[node] = length; - if (length > 0) - { - if (length > cache.recv_buf_caps[node]) - { - if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node]; - cache.recv_bufs[node] = new double[length]; - cache.recv_buf_caps[node] = length; - } - data_packermix(cache.recv_bufs[node], cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry); - } - } - else - { - int slength = data_packermix(0, cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry); - cache.send_lengths[node] = slength; - if (slength > 0) - { - if (slength > cache.send_buf_caps[node]) - { - if (cache.send_bufs[node]) delete[] cache.send_bufs[node]; - cache.send_bufs[node] = new double[slength]; - cache.send_buf_caps[node] = slength; - } - data_packermix(cache.send_bufs[node], cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry); - MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, cache.reqs + req_no++); - } - int rlength = data_packermix(0, cache.combined_src[node], cache.combined_dst[node], node, UNPACK, VarList1, VarList2, Symmetry); - cache.recv_lengths[node] = rlength; - if (rlength > 0) - { - if (rlength > cache.recv_buf_caps[node]) - { - if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node]; - cache.recv_bufs[node] = new double[rlength]; - cache.recv_buf_caps[node] = rlength; - } - MPI_Irecv((void *)cache.recv_bufs[node], rlength, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, cache.reqs + req_no++); - } - } - } - - MPI_Waitall(req_no, cache.reqs, cache.stats); - - for (int node = 0; node < cpusize; node++) - if (cache.recv_bufs[node] && cache.recv_lengths[node] > 0) - data_packermix(cache.recv_bufs[node], cache.combined_src[node], cache.combined_dst[node], node, UNPACK, VarList1, VarList2, Symmetry); -} - -// collect all buffer grid segments or blocks for given patch -MyList *Parallel::build_buffer_gsl(Patch *Pat) -{ - MyList *cgsl, *gsc, *gsb; - - gsc = build_complete_gsl(Pat); // including ghost - - gsb = new MyList; - gsb->data = new Parallel::gridseg; - - for (int i = 0; i < dim; i++) - { - double DH = Pat->blb->data->getdX(i); - gsb->data->uub[i] = Pat->bbox[dim + i] - Pat->uui[i] * DH; - gsb->data->llb[i] = Pat->bbox[i] + Pat->lli[i] * DH; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - gsb->data->shape[i] = int((gsb->data->uub[i] - gsb->data->llb[i]) / DH + 0.4) + 1; -#else -#ifdef Cell - gsb->data->shape[i] = int((gsb->data->uub[i] - gsb->data->llb[i]) / DH + 0.4); -#else -#error Not define Vertex nor Cell -#endif -#endif - } - gsb->data->Bg = 0; - gsb->next = 0; - - cgsl = gsl_subtract(gsc, gsb); - - gsc->destroyList(); - gsb->destroyList(); - - // set illb and iuub - gsb = cgsl; - while (gsb) - { - for (int i = 0; i < dim; i++) - { - double DH = Pat->blb->data->getdX(i); - gsb->data->iuub[i] = Pat->bbox[dim + i] - Pat->uui[i] * DH; - gsb->data->illb[i] = Pat->bbox[i] + Pat->lli[i] * DH; - } - gsb = gsb->next; - } - - return cgsl; -} -MyList *Parallel::build_buffer_gsl(MyList *PatL) -{ - MyList *cgsl = 0, *gs; - while (PatL) - { - if (cgsl) - { - gs->next = build_buffer_gsl(PatL->data); - gs = gs->next; - if (gs) - while (gs->next) - gs = gs->next; - } - else - { - cgsl = build_buffer_gsl(PatL->data); - gs = cgsl; - if (gs) - while (gs->next) - gs = gs->next; - } - PatL = PatL->next; - } - - return cgsl; -} -void Parallel::Prolongint(Patch *Patc, Patch *Patf, - MyList *VarList1 /* source */, MyList *VarList2 /* target */, - int Symmetry) -{ - if (Patc->lev >= Patf->lev) - { - cout << "Parallel::Prolong: meet requst of Prolong from lev#" << Patc->lev << " to lev#" << Patf->lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - int myrank = 0; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - - int num_var = 0; - MyList *varl; - varl = VarList1; - while (varl) - { - num_var++; - varl = varl->next; - } - - MyList *BP = Patf->blb; - while (BP) - { - int Npts; - if (myrank == BP->data->rank) - Npts = BP->data->shape[0] * BP->data->shape[1] * BP->data->shape[2]; - MPI_Bcast(&Npts, 1, MPI_INT, BP->data->rank, MPI_COMM_WORLD); - double *pox[3]; - for (int i = 0; i < 3; i++) - pox[i] = new double[Npts]; - if (myrank == BP->data->rank) - { - for (int i = 0; i < Npts; i++) - { - int ind[3]; - Parallel::getarrayindex(3, BP->data->shape, ind, i); - pox[0][i] = BP->data->X[0][ind[0]]; - pox[1][i] = BP->data->X[1][ind[1]]; - pox[2][i] = BP->data->X[2][ind[2]]; - } - } - for (int i = 0; i < 3; i++) - MPI_Bcast(pox[i], Npts, MPI_DOUBLE, BP->data->rank, MPI_COMM_WORLD); - double *res; - res = new double[num_var * Npts]; - Patc->Interp_Points(VarList1, Npts, pox, res, Symmetry); // because this operation is a global operation (for all processors) - // we have to isolate it out of myrank==BP->data->rank - if (myrank == BP->data->rank) - { - for (int i = 0; i < Npts; i++) - { - varl = VarList2; - int j = 0; - while (varl) - { - (BP->data->fgfs[varl->data->sgfn])[i] = res[j + i * num_var]; - j++; - varl = varl->next; - } - } - } - delete[] pox[0]; - delete[] pox[1]; - delete[] pox[2]; - delete[] res; - BP = BP->next; - } -} -// -void Parallel::merge_gsl(MyList *&A, const double ratio) -{ - if (!A) - return; - - MyList *B, *C, *D = A; - bool flag = false; - while (D->next) - { - B = D->next; - while (B) - { - flag = merge_gs(D, B, C, ratio); - if (flag) - break; - B = B->next; - } - if (flag) - break; - D = D->next; - } - - if (flag) - { - // delete D and B from A - MyList *E = A; - while (E->next) - { - MyList *tp = E->next; - if (D == tp || B == tp) - { - E->next = (tp->next) ? tp->next : 0; - delete tp->data; - delete tp; - } - if (E->next) - E = E->next; - } - - if (D == A) - { - MyList *tp = A; - A = (A->next) ? A->next : 0; - delete tp->data; - delete tp; - } - // cat C to A - if (A) - A->catList(C); - else - A = C; - - merge_gsl(A, ratio); - } -} -// -bool Parallel::merge_gs(MyList *D, MyList *B, MyList *&C, const double ratio) -{ - if (!B || !D) - return false; - - C = 0; - double llb[dim], uub[dim], DH[dim]; - for (int i = 0; i < dim; i++) - { - double tdh; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - DH[i] = (D->data->uub[i] - D->data->llb[i]) / (D->data->shape[i] - 1); - tdh = (B->data->uub[i] - B->data->llb[i]) / (B->data->shape[i] - 1); -#else -#ifdef Cell - DH[i] = (D->data->uub[i] - D->data->llb[i]) / D->data->shape[i]; - tdh = (B->data->uub[i] - B->data->llb[i]) / B->data->shape[i]; -#else -#error Not define Vertex nor Cell -#endif -#endif - if (!feq(DH[i], tdh, DH[i] / 2)) - { - cout << "Parallel::merge_gs meets different grid segment " << DH[i] << " vs " << tdh << endl; - checkgsl(B, true); - checkgsl(D, true); - MPI_Abort(MPI_COMM_WORLD, 1); - } - llb[i] = Mymax(D->data->llb[i], B->data->llb[i]); - uub[i] = Mymin(D->data->uub[i], B->data->uub[i]); - // if(uub[i]-llb[i] < DH[i]/2) return false; //here this is valid for both vertex and cell - - // use 0 instead of DH[i]/2, we consider contact case, 2012 Aug 8 - if (uub[i] - llb[i] < 0) - return false; // here this is valid for both vertex and cell - } - - // vb: volume of B - // vd: volume of D - // vo: volume of overlap - // vt: volume of smallest common box (virtual merged box) - double vd = 1, vb = 1, vt = 1, vo = 1; - for (int i = 0; i < dim; i++) - { - vt = vt * (Mymax(D->data->uub[i], B->data->uub[i]) - Mymin(D->data->llb[i], B->data->llb[i])); - vo = vo * (uub[i] - llb[i]); - vd = vd * (D->data->uub[i] - D->data->llb[i]); - vb = vb * (B->data->uub[i] - B->data->llb[i]); - } - - // smller ratio, more possible to merge - if ((vd + vb - vo) / vt > ratio) - { - C = new MyList; - C->data = new gridseg; - for (int i = 0; i < dim; i++) - { - C->data->uub[i] = Mymax(D->data->uub[i], B->data->uub[i]); - C->data->llb[i] = Mymin(D->data->llb[i], B->data->llb[i]); -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - C->data->shape[i] = int((C->data->uub[i] - C->data->llb[i]) / DH[i] + 0.4) + 1; -#else -#ifdef Cell - C->data->shape[i] = int((C->data->uub[i] - C->data->llb[i]) / DH[i] + 0.4); -#else -#error Not define Vertex nor Cell -#endif -#endif - } - if (D->data->Bg == B->data->Bg) - C->data->Bg = D->data->Bg; - else - C->data->Bg = 0; - - C->next = 0; - - return true; - } - else - { - return false; - } -} -// Add ghost region to tangent plane -// we assume the grids have the same resolution -void Parallel::add_ghost_touch(MyList *&A) -{ - if (!A || !(A->next)) - return; - - double DH[dim]; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - for (int i = 0; i < dim; i++) - DH[i] = (A->data->uub[i] - A->data->llb[i]) / (A->data->shape[i] - 1) / 2; -#else -#ifdef Cell - for (int i = 0; i < dim; i++) - DH[i] = (A->data->uub[i] - A->data->llb[i]) / A->data->shape[i] / 2; -#else -#error Not define Vertex nor Cell -#endif -#endif - - MyList *C1, *C2, *A1 = A, *A2, *dc; - dc = C1 = clone_gsl(A, false); - while (C1) - { - C2 = C1->next; - A2 = A1->next; - while (C2) - { - for (int i = 0; i < dim; i++) - { - if (feq(C1->data->llb[i], C2->data->uub[i], DH[i])) - { - // direction i touch, other directions overlap - bool flag = true; - for (int j = 0; j < i; j++) - if ((C1->data->llb[j] - C2->data->llb[j]) * (C1->data->uub[j] - C2->data->llb[j]) > 0 && - (C2->data->llb[j] - C1->data->llb[j]) * (C2->data->uub[j] - C1->data->llb[j]) > 0) - flag = false; - for (int j = i + 1; j < dim; j++) - if ((C1->data->llb[j] - C2->data->llb[j]) * (C1->data->uub[j] - C2->data->llb[j]) > 0 && - (C2->data->llb[j] - C1->data->llb[j]) * (C2->data->uub[j] - C1->data->llb[j]) > 0) - flag = false; - - if (flag) - { - // only add one ghost region - if (feq(A1->data->llb[i], C1->data->llb[i], DH[i])) - { - A1->data->llb[i] -= ghost_width * 2 * DH[i]; - A1->data->shape[i] += ghost_width; - } - if (feq(A2->data->uub[i], C2->data->uub[i], DH[i])) - { - A2->data->uub[i] += ghost_width * 2 * DH[i]; - A2->data->shape[i] += ghost_width; - } - } - } - if (feq(C1->data->uub[i], C2->data->llb[i], DH[i])) - { - // direction i touch, other directions overlap - bool flag = true; - for (int j = 0; j < i; j++) - if ((C1->data->llb[j] - C2->data->llb[j]) * (C1->data->uub[j] - C2->data->llb[j]) > 0 && - (C2->data->llb[j] - C1->data->llb[j]) * (C2->data->uub[j] - C1->data->llb[j]) > 0) - flag = false; - for (int j = i + 1; j < dim; j++) - if ((C1->data->llb[j] - C2->data->llb[j]) * (C1->data->uub[j] - C2->data->llb[j]) > 0 && - (C2->data->llb[j] - C1->data->llb[j]) * (C2->data->uub[j] - C1->data->llb[j]) > 0) - flag = false; - - if (flag) - { - // only add one ghost region - if (feq(A1->data->uub[i], C1->data->uub[i], DH[i])) - { - A1->data->uub[i] += ghost_width * 2 * DH[i]; - A1->data->shape[i] += ghost_width; - } - if (feq(A2->data->llb[i], C2->data->llb[i], DH[i])) - { - A2->data->llb[i] -= ghost_width * 2 * DH[i]; - A2->data->shape[i] += ghost_width; - } - } - } - } - C2 = C2->next; - A2 = A2->next; - } - C1 = C1->next; - A1 = A1->next; - } - - if (dc) - dc->destroyList(); -} -// According to overlap to cut the gsl into recular pices -void Parallel::cut_gsl(MyList *&A) -{ - if (!A) - return; - - MyList *B, *C, *D = A; - bool flag = false; - while (D->next) - { - B = D->next; - while (B) - { - flag = cut_gs(D, B, C); - if (flag) - break; - B = B->next; - } - if (flag) - break; - D = D->next; - } - - if (flag) - { - // delete D and B from A - MyList *E = A; - while (E->next) - { - MyList *tp = E->next; - if (D == tp || B == tp) - { - E->next = (tp->next) ? tp->next : 0; - delete tp->data; - delete tp; - } - if (E->next) - E = E->next; - } - - if (D == A) - { - MyList *tp = A; - A = (A->next) ? A->next : 0; - delete tp->data; - delete tp; - } - // cat C to A - if (A) - A->catList(C); - else - A = C; - - cut_gsl(A); - } -} -// when D and B have overlap, cut them into C and return true -// otherwise return false and C=0 -bool Parallel::cut_gs(MyList *D, MyList *B, MyList *&C) -{ - C = 0; - double llb[dim], uub[dim], DH[dim]; - for (int i = 0; i < dim; i++) - { - double tdh; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - DH[i] = (D->data->uub[i] - D->data->llb[i]) / (D->data->shape[i] - 1); - tdh = (B->data->uub[i] - B->data->llb[i]) / (B->data->shape[i] - 1); -#else -#ifdef Cell - DH[i] = (D->data->uub[i] - D->data->llb[i]) / D->data->shape[i]; - tdh = (B->data->uub[i] - B->data->llb[i]) / B->data->shape[i]; -#else -#error Not define Vertex nor Cell -#endif -#endif - if (!feq(DH[i], tdh, DH[i] / 2)) - { - cout << "Parallel::cut_gs meets different grid segment " << DH[i] << " vs " << tdh << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - llb[i] = Mymax(D->data->llb[i], B->data->llb[i]); - uub[i] = Mymin(D->data->uub[i], B->data->uub[i]); - // for efficiency we ask the width of the patch at least 2(buffer+ghost+BD ghost) - if (uub[i] - llb[i] < DH[i] * 2 * (buffer_width + 2 * ghost_width)) - return false; // here this is valid for both vertex and cell - } - - // this part code results in 5 patches generally - - C = new MyList; - C->data = new gridseg; - for (int i = 0; i < dim; i++) - { - C->data->llb[i] = llb[i]; - C->data->uub[i] = uub[i]; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - C->data->shape[i] = int((C->data->uub[i] - C->data->llb[i]) / DH[i] + 0.4) + 1; -#else -#ifdef Cell - C->data->shape[i] = int((C->data->uub[i] - C->data->llb[i]) / DH[i] + 0.4); -#else -#error Not define Vertex nor Cell -#endif -#endif - } - if (D->data->Bg == B->data->Bg) - C->data->Bg = D->data->Bg; - else - C->data->Bg = 0; - - C->next = gs_subtract_virtual(D, C); - - MyList *E = C; - - while (E->next) - E = E->next; - - E->next = gs_subtract_virtual(B, C); - - // this part code results in 3 patches generally - /* - C = clone_gsl(D,true); - C->next = gs_subtract_virtual(B,C); - */ - - return true; -} -// note here it is different to real cut, we need leave the cutting edge for both vertex center and cell center -MyList *Parallel::gs_subtract_virtual(MyList *A, MyList *B) -{ - if (!A) - return 0; - if (!B) - return clone_gsl(A, true); - - double cut_plane[2 * dim], DH[dim]; - - for (int i = 0; i < dim; i++) - { - double tdh; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - DH[i] = (A->data->uub[i] - A->data->llb[i]) / (A->data->shape[i] - 1); - tdh = (B->data->uub[i] - B->data->llb[i]) / (B->data->shape[i] - 1); -#else -#ifdef Cell - DH[i] = (A->data->uub[i] - A->data->llb[i]) / A->data->shape[i]; - tdh = (B->data->uub[i] - B->data->llb[i]) / B->data->shape[i]; -#else -#error Not define Vertex nor Cell -#endif -#endif - if (!feq(DH[i], tdh, DH[i] / 2)) - { - cout << "Parallel::gs_subtract_virtual meets different grid segment " << DH[i] << " vs " << tdh << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - - MyList *C = 0, *q; - for (int i = 0; i < dim; i++) - { - if (B->data->llb[i] > A->data->uub[i] || B->data->uub[i] < A->data->llb[i]) - return clone_gsl(A, true); - cut_plane[i] = A->data->llb[i]; - cut_plane[i + dim] = A->data->uub[i]; - } - - for (int i = 0; i < dim; i++) - { - cut_plane[i] = Mymax(A->data->llb[i], B->data->llb[i]); - if (cut_plane[i] > A->data->llb[i]) - { - q = clone_gsl(A, true); - // prolong the list from head - if (C) - q->next = C; - C = q; - for (int j = 0; j < dim; j++) - { - if (i == j) - { - C->data->llb[i] = A->data->llb[i]; - // **note here it is different to real cut, we need leave the cutting edge for both vertex center and cell center** - C->data->uub[i] = Mymax(C->data->llb[i], cut_plane[i]); - } - else - { - C->data->llb[j] = cut_plane[j]; - C->data->uub[j] = cut_plane[j + dim]; - } -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - C->data->shape[j] = int((C->data->uub[j] - C->data->llb[j]) / DH[j] + 0.4) + 1; -#else -#ifdef Cell - C->data->shape[j] = int((C->data->uub[j] - C->data->llb[j]) / DH[j] + 0.4); -#else -#error Not define Vertex nor Cell -#endif -#endif - } - } - - cut_plane[i + dim] = Mymin(A->data->uub[i], B->data->uub[i]); - if (cut_plane[i + dim] < A->data->uub[i]) - { - q = clone_gsl(A, true); - if (C) - q->next = C; - C = q; - for (int j = 0; j < dim; j++) - { - if (i == j) - { - C->data->uub[i] = A->data->uub[i]; - // note here it is different to real cut, we need leave the cutting edge for both vertex center and cell center - C->data->llb[i] = Mymin(C->data->uub[i], cut_plane[i + dim]); - } - else - { - C->data->llb[j] = cut_plane[j]; - C->data->uub[j] = cut_plane[j + dim]; - } -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - C->data->shape[j] = int((C->data->uub[j] - C->data->llb[j]) / DH[j] + 0.4) + 1; -#else -#ifdef Cell - C->data->shape[j] = int((C->data->uub[j] - C->data->llb[j]) / DH[j] + 0.4); -#else -#error Not define Vertex nor Cell -#endif -#endif - } - } - } - return C; -} -// note the data structure -// if CC is true -// 1 ----------- 1 ------ ^ -// 0 ------ | t -// 0 ----------- old ------ | -// -// old ----------- -// if CC is false -// 1 ----------- 1 ------ ^ -// 0 ----------- 0 ------ | t -// old ----------- old ------ | -void Parallel::fill_level_data(MyList *PatLd, MyList *PatLs, MyList *PatcL, - MyList *OldList, MyList *StateList, MyList *FutureList, - MyList *tmList, int Symmetry, bool BB, bool CC) -{ - if (PatLd->data->lev != PatLs->data->lev) - { - cout << "Parallel::fill_level_data: meet requst from lev#" << PatLs->data->lev << " to lev#" << PatLd->data->lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - if (PatLd->data->lev <= PatcL->data->lev) - { - cout << "Parallel::fill_level_data: meet prolong requst from lev#" << PatcL->data->lev << " to lev#" << PatLd->data->lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - int cpusize; - MPI_Comm_size(MPI_COMM_WORLD, &cpusize); - - MyList *VarList = 0; - MyList *p; - p = StateList; - while (p) - { - if (VarList) - VarList->insert(p->data); - else - VarList = new MyList(p->data); - p = p->next; - } - p = FutureList; - while (p) - { - if (VarList) - VarList->insert(p->data); - else - VarList = new MyList(p->data); - p = p->next; - } - - MyList *dst; - MyList **src, **transfer_src, **transfer_dst; - src = new MyList *[cpusize]; - transfer_src = new MyList *[cpusize]; - transfer_dst = new MyList *[cpusize]; - - dst = build_complete_gsl(PatLd); // including ghost - // copy part - for (int node = 0; node < cpusize; node++) - { - src[node] = build_owned_gsl(PatLs, node, 0, Symmetry); // similar to Sync - build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node - } - - transfer(transfer_src, transfer_dst, VarList, VarList, Symmetry); - - for (int node = 0; node < cpusize; node++) - { - if (src[node]) - src[node]->destroyList(); - if (transfer_src[node]) - transfer_src[node]->destroyList(); - if (transfer_dst[node]) - transfer_dst[node]->destroyList(); - } - - MyList *dsts, *dstd; - dsts = build_complete_gsl_virtual(PatLs); - dstd = dst; - dst = gsl_subtract(dstd, dsts); - if (dstd) - dstd->destroyList(); - if (dsts) - dsts->destroyList(); - - if (dst) - { - // prolongation part - for (int node = 0; node < cpusize; node++) - { - src[node] = build_owned_gsl(PatcL, node, 4, Symmetry); // - buffer - ghost - BD ghost - build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node - } - - if (CC) - { - // for FutureList - // restrict first~~~> - { - Restrict(PatcL, PatLs, FutureList, FutureList, Symmetry); - Sync(PatcL, FutureList, Symmetry); - } - //<~~~prolong then - transfer(transfer_src, transfer_dst, FutureList, FutureList, Symmetry); - - // for StateList - // time interpolation part - if (BB) - prepare_inter_time_level(PatcL, FutureList, StateList, OldList, - tmList, 0); // use SynchList_pre as temporal storage space - else - prepare_inter_time_level(PatcL, FutureList, StateList, - tmList, 0); // use SynchList_pre as temporal storage space - // restrict first~~~> - { - Restrict(PatcL, PatLs, StateList, tmList, Symmetry); - Sync(PatcL, tmList, Symmetry); - } - //<~~~prolong then - transfer(transfer_src, transfer_dst, tmList, StateList, Symmetry); - } - else - { - // for both FutureList and StateList - // restrict first~~~> - { - Restrict(PatcL, PatLs, VarList, VarList, Symmetry); - Sync(PatcL, VarList, Symmetry); - } - //<~~~prolong then - transfer(transfer_src, transfer_dst, VarList, VarList, Symmetry); - } - - for (int node = 0; node < cpusize; node++) - { - if (src[node]) - src[node]->destroyList(); - if (transfer_src[node]) - transfer_src[node]->destroyList(); - if (transfer_dst[node]) - transfer_dst[node]->destroyList(); - } - - dst->destroyList(); - } - - delete[] src; - delete[] transfer_src; - delete[] transfer_dst; - - VarList->clearList(); -} -void Parallel::KillBlocks(MyList *PatchLIST) -{ - while (PatchLIST) - { - Patch *Pp = PatchLIST->data; - MyList *bg; - while (Pp->blb) - { - if (Pp->blb == Pp->ble) - break; - bg = (Pp->blb->next) ? Pp->blb->next : 0; - delete Pp->blb->data; - delete Pp->blb; - Pp->blb = bg; - } - if (Pp->ble) - { - delete Pp->ble->data; - delete Pp->ble; - } - Pp->blb = Pp->ble = 0; - PatchLIST = PatchLIST->next; - } -} -bool Parallel::PatList_Interp_Points(MyList *PatL, MyList *VarList, - int NN, double **XX, - double *Shellf, int Symmetry) -{ - MyList *varl; - int num_var = 0; - varl = VarList; - while (varl) - { - num_var++; - varl = varl->next; - } - - double lld[dim], uud[dim]; - double **pox; - pox = new double *[dim]; - for (int j = 0; j < dim; j++) - pox[j] = new double[1]; - for (int i = 0; i < NN; i++) - { - MyList *PL = PatL; - while (PL) - { - bool flag = true; - for (int j = 0; j < dim; j++) - { - double h = PL->data->getdX(j); - lld[j] = PL->data->lli[j] * h; - uud[j] = PL->data->uui[j] * h; - if (XX[j][i] < PL->data->bbox[j] + lld[j] || XX[j][i] > PL->data->bbox[j + dim] - uud[j]) - { - flag = false; - break; - } - pox[j][0] = XX[j][i]; - } - if (flag) - { - PL->data->Interp_Points(VarList, 1, pox, Shellf + i * num_var, Symmetry); - break; - } - PL = PL->next; - } - if (!PL) - { - checkpatchlist(PatL, false); - return false; - } - } - for (int j = 0; j < dim; j++) - delete[] pox[j]; - delete[] pox; - - return true; -} -bool Parallel::PatList_Interp_Points(MyList *PatL, MyList *VarList, - int NN, double **XX, - double *Shellf, int Symmetry, MPI_Comm Comm_here) -{ - MyList *varl; - int num_var = 0; - varl = VarList; - while (varl) - { - num_var++; - varl = varl->next; - } - - double lld[dim], uud[dim]; - double **pox; - pox = new double *[dim]; - for (int j = 0; j < dim; j++) - pox[j] = new double[1]; - for (int i = 0; i < NN; i++) - { - MyList *PL = PatL; - while (PL) - { - bool flag = true; - for (int j = 0; j < dim; j++) - { - double h = PL->data->getdX(j); - lld[j] = PL->data->lli[j] * h; - uud[j] = PL->data->uui[j] * h; - if (XX[j][i] < PL->data->bbox[j] + lld[j] || XX[j][i] > PL->data->bbox[j + dim] - uud[j]) - { - flag = false; - break; - } - pox[j][0] = XX[j][i]; - } - if (flag) - { - PL->data->Interp_Points(VarList, 1, pox, Shellf + i * num_var, Symmetry, Comm_here); - break; - } - PL = PL->next; - } - if (!PL) - { - checkpatchlist(PatL, false); - return false; - } - } - for (int j = 0; j < dim; j++) - delete[] pox[j]; - delete[] pox; - - return true; -} -void Parallel::aligncheck(double *bbox0, double *bboxl, int lev, double *DH0, int *shape) -{ - const double aligntiny = 0.1; - double DHl, rr; - int NN; - for (int i = 0; i < dim; i++) - { - DHl = DH0[i] * pow(0.5, lev); - rr = bboxl[i] - bbox0[i]; - bboxl[i] = bbox0[i] + int(rr / DHl + 0.4) * DHl; - rr = bbox0[i + dim] - bboxl[i + dim]; - bboxl[i + dim] = bbox0[i + dim] - int(rr / DHl + 0.4) * DHl; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - NN = int((bboxl[i + dim] - bboxl[i]) / DHl + 0.4) + 1; -#else -#ifdef Cell - NN = int((bboxl[i + dim] - bboxl[i]) / DHl + 0.4); -#else -#error Not define Vertex nor Cell -#endif -#endif - if (NN != shape[i]) - { - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - if (myrank == 0) - { - cout << "Parallel::aligncheck want shape " << NN << " for lev#" << lev << ", but " << shape[i] << endl; - cout << "i = " << i << ", low = " << bboxl[i] << ", up = " << bboxl[i + dim] << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - } -} -bool Parallel::point_locat_gsl(double *pox, MyList *gsl) -{ - bool flag = false; - while (gsl) - { - for (int i = 0; i < dim; i++) - { - if (pox[i] > gsl->data->llb[i] && pox[i] < gsl->data->uub[i]) - flag = true; - else - { - flag = false; - break; - } - } - if (flag) - break; - gsl = gsl->next; - } - - return flag; -} -void Parallel::checkpatchlist(MyList *PatL, bool buflog) -{ - MyList *PL = PatL; - while (PL) - { - PL->data->checkPatch(buflog); - PL = PL->next; - } -} + +#include "Parallel.h" +#include "fmisc.h" +#include "prolongrestrict.h" +#include "misc.h" +#include "parameters.h" +#include + +int Parallel::partition1(int &nx, int split_size, int min_width, int cpusize, int shape) // special for 1 diemnsion +{ + nx = Mymax(1, shape / min_width); + nx = Mymin(cpusize, nx); + + return nx; +} +int Parallel::partition2(int *nxy, int split_size, int *min_width, int cpusize, int *shape) // special for 2 diemnsions +{ +#define SEARCH_SIZE 5 + int i, j, nx, ny; + int maxnx, maxny; + int mnx, mny; + int dn, hmin_width, cmin_width; + int cnx, cny; + double fx, fy; + int block_size; + int n; + + block_size = shape[0] * shape[1]; + n = Mymax(1, (block_size + split_size / 2) / split_size); + + maxnx = Mymax(1, shape[0] / min_width[0]); + maxnx = Mymin(cpusize, maxnx); + maxny = Mymax(1, shape[1] / min_width[1]); + maxny = Mymin(cpusize, maxny); + fx = (double)shape[0] / (shape[0] + shape[1]); + fy = (double)shape[1] / (shape[0] + shape[1]); + nx = mnx = Mymax(1, Mymin(maxnx, (int)(sqrt(double(n)) * fx / fy))); + ny = mny = Mymax(1, Mymin(maxny, (int)(sqrt(double(n)) * fy / fx))); + dn = abs(n - nx * ny); + hmin_width = Mymin(shape[0] / nx, shape[1] / ny); + for (cny = Mymax(1, mny - SEARCH_SIZE); cny <= (Mymin(mny + SEARCH_SIZE, maxny)); cny++) + for (cnx = Mymax(1, mnx - SEARCH_SIZE); cnx <= (Mymin(mnx + SEARCH_SIZE, maxnx)); cnx++) + { + cmin_width = Mymin(shape[0] / cnx, shape[1] / cny); + if (dn > abs(n - cnx * cny) || (dn == abs(n - cnx * cny) && cmin_width > hmin_width)) + { + dn = abs(n - cnx * cny); + nx = cnx; + ny = cny; + hmin_width = cmin_width; + } + } + + nxy[0] = nx; + nxy[1] = ny; + + return nx * ny; +#undef SEARCH_SIZE +} +int Parallel::partition3(int *nxyz, int split_size, int *min_width, int cpusize, int *shape) // special for 3 diemnsions +#if 1 // algrithsm from Pretorius +{ +// cout< abs(n - cnx * cny * cnz) || (dn == abs(n - cnx * cny * cnz) && cmin_width > hmin_width)) + { + dn = abs(n - cnx * cny * cnz); + nx = cnx; + ny = cny; + nz = cnz; + hmin_width = cmin_width; + } + } + + nxyz[0] = nx; + nxyz[1] = ny; + nxyz[2] = nz; + + return nx * ny * nz; +#undef SEARCH_SIZE +} +#elif 0 // Zhihui's idea one on 2013-09-25 +{ + int nx, ny, nz; + int hmin_width; + hmin_width = Mymin(min_width[0], min_width[1]); + hmin_width = Mymin(hmin_width, min_width[2]); + nx = shape[0] / hmin_width; + if (nx * hmin_width < shape[0]) + nx++; + ny = shape[1] / hmin_width; + if (ny * hmin_width < shape[1]) + ny++; + nz = shape[2] / hmin_width; + if (nz * hmin_width < shape[2]) + nz++; + while (nx * ny * nz > cpusize) + { + hmin_width++; + nx = shape[0] / hmin_width; + if (nx * hmin_width < shape[0]) + nx++; + ny = shape[1] / hmin_width; + if (ny * hmin_width < shape[1]) + ny++; + nz = shape[2] / hmin_width; + if (nz * hmin_width < shape[2]) + nz++; + } + + nxyz[0] = nx; + nxyz[1] = ny; + nxyz[2] = nz; + + return nx * ny * nz; +} +#elif 0 // Zhihui's idea two on 2013-09-25 +{ + int nx, ny, nz; + const int hmin_width = 8; // for example we use 8 + nx = shape[0] / hmin_width; + if (nx * hmin_width < shape[0]) + nx++; + ny = shape[1] / hmin_width; + if (ny * hmin_width < shape[1]) + ny++; + nz = shape[2] / hmin_width; + if (nz * hmin_width < shape[2]) + nz++; + + nxyz[0] = nx; + nxyz[1] = ny; + nxyz[2] = nz; + + return nx * ny * nz; +} +#endif +// distribute the data to cprocessors +#if (PSTR == 0) +MyList *Parallel::distribute(MyList *PatchLIST, int cpusize, int ingfsi, int fngfsi, + bool periodic, int nodes) +{ +#ifdef USE_GPU_DIVIDE + double cpu_part, gpu_part; + map::iterator iter; + iter = parameters::dou_par.find("cpu part"); + if (iter != parameters::dou_par.end()) + { + cpu_part = iter->second; + } + else + { + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + // read parameter from file + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind; + char pname[50]; + { + map::iterator iter = parameters::str_par.find("inputpar"); + if (iter != parameters::str_par.end()) + { + strcpy(pname, (iter->second).c_str()); + } + else + { + cout << "Error inputpar" << endl; + exit(0); + } + } + ifstream inf(pname, ifstream::in); + if (!inf.good() && myrank == 0) + { + cout << "Can not open parameter file " << pname << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind); + if (status == -1) + { + cout << "error reading parameter file " << pname << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "ABE") + { + if (skey == "cpu part") + cpu_part = atof(sval.c_str()); + } + } + inf.close(); + + parameters::dou_par.insert(map::value_type("cpu part", cpu_part)); + } + iter = parameters::dou_par.find("gpu part"); + if (iter != parameters::dou_par.end()) + { + gpu_part = iter->second; + } + else + { + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + // read parameter from file + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind; + char pname[50]; + { + map::iterator iter = parameters::str_par.find("inputpar"); + if (iter != parameters::str_par.end()) + { + strcpy(pname, (iter->second).c_str()); + } + else + { + cout << "Error inputpar" << endl; + exit(0); + } + } + ifstream inf(pname, ifstream::in); + if (!inf.good() && myrank == 0) + { + cout << "Can not open parameter file " << pname << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind); + if (status == -1) + { + cout << "error reading parameter file " << pname << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "ABE") + { + if (skey == "gpu part") + gpu_part = atof(sval.c_str()); + } + } + inf.close(); + + parameters::dou_par.insert(map::value_type("gpu part", gpu_part)); + } + + if (nodes == 0) + nodes = cpusize / 2; +#else + if (nodes == 0) + nodes = cpusize; +#endif + + if (dim != 3) + { + cout << "distrivute: now we only support 3-dimension" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + MyList *BlL = 0; + + int split_size, min_size, block_size = 0; + + int min_width = 2 * Mymax(ghost_width, buffer_width); + int nxyz[dim], mmin_width[dim], min_shape[dim]; + + MyList *PLi = PatchLIST; + for (int i = 0; i < dim; i++) + min_shape[i] = PLi->data->shape[i]; + int lev = PLi->data->lev; + PLi = PLi->next; + while (PLi) + { + Patch *PP = PLi->data; + for (int i = 0; i < dim; i++) + min_shape[i] = Mymin(min_shape[i], PP->shape[i]); + if (lev != PLi->data->lev) + cout << "Parallel::distribute CAUSTION: meet Patches for different level: " << lev << " and " << PLi->data->lev << endl; + PLi = PLi->next; + } + + for (int i = 0; i < dim; i++) + mmin_width[i] = Mymin(min_width, min_shape[i]); + + min_size = mmin_width[0]; + for (int i = 1; i < dim; i++) + min_size = min_size * mmin_width[i]; + + PLi = PatchLIST; + while (PLi) + { + Patch *PP = PLi->data; + // PP->checkPatch(true); + int bs = PP->shape[0]; + for (int i = 1; i < dim; i++) + bs = bs * PP->shape[i]; + block_size = block_size + bs; + PLi = PLi->next; + } + split_size = Mymax(min_size, block_size / nodes); + split_size = Mymax(1, split_size); + + int n_rank = 0; + PLi = PatchLIST; + int reacpu = 0; + while (PLi) + { + Patch *PP = PLi->data; + + reacpu += partition3(nxyz, split_size, mmin_width, nodes, PP->shape); + + Block *ng0, *ng; + int shape_here[dim], ibbox_here[2 * dim]; + double bbox_here[2 * dim], dd; + + // ibbox : 0,...N-1 + for (int i = 0; i < nxyz[0]; i++) + for (int j = 0; j < nxyz[1]; j++) + for (int k = 0; k < nxyz[2]; k++) + { + ibbox_here[0] = (PP->shape[0] * i) / nxyz[0]; + ibbox_here[3] = (PP->shape[0] * (i + 1)) / nxyz[0] - 1; + ibbox_here[1] = (PP->shape[1] * j) / nxyz[1]; + ibbox_here[4] = (PP->shape[1] * (j + 1)) / nxyz[1] - 1; + ibbox_here[2] = (PP->shape[2] * k) / nxyz[2]; + ibbox_here[5] = (PP->shape[2] * (k + 1)) / nxyz[2] - 1; + + if (periodic) + { + ibbox_here[0] = ibbox_here[0] - ghost_width; + ibbox_here[3] = ibbox_here[3] + ghost_width; + ibbox_here[1] = ibbox_here[1] - ghost_width; + ibbox_here[4] = ibbox_here[4] + ghost_width; + ibbox_here[2] = ibbox_here[2] - ghost_width; + ibbox_here[5] = ibbox_here[5] + ghost_width; + } + else + { + ibbox_here[0] = Mymax(0, ibbox_here[0] - ghost_width); + ibbox_here[3] = Mymin(PP->shape[0] - 1, ibbox_here[3] + ghost_width); + ibbox_here[1] = Mymax(0, ibbox_here[1] - ghost_width); + ibbox_here[4] = Mymin(PP->shape[1] - 1, ibbox_here[4] + ghost_width); + ibbox_here[2] = Mymax(0, ibbox_here[2] - ghost_width); + ibbox_here[5] = Mymin(PP->shape[2] - 1, ibbox_here[5] + ghost_width); + } + + shape_here[0] = ibbox_here[3] - ibbox_here[0] + 1; + shape_here[1] = ibbox_here[4] - ibbox_here[1] + 1; + shape_here[2] = ibbox_here[5] - ibbox_here[2] + 1; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + // 0--4, 5--10 + dd = (PP->bbox[3] - PP->bbox[0]) / (PP->shape[0] - 1); + bbox_here[0] = PP->bbox[0] + ibbox_here[0] * dd; + bbox_here[3] = PP->bbox[0] + ibbox_here[3] * dd; + + dd = (PP->bbox[4] - PP->bbox[1]) / (PP->shape[1] - 1); + bbox_here[1] = PP->bbox[1] + ibbox_here[1] * dd; + bbox_here[4] = PP->bbox[1] + ibbox_here[4] * dd; + + dd = (PP->bbox[5] - PP->bbox[2]) / (PP->shape[2] - 1); + bbox_here[2] = PP->bbox[2] + ibbox_here[2] * dd; + bbox_here[5] = PP->bbox[2] + ibbox_here[5] * dd; +#else +#ifdef Cell + // 0--5, 5--10 + dd = (PP->bbox[3] - PP->bbox[0]) / PP->shape[0]; + bbox_here[0] = PP->bbox[0] + (ibbox_here[0]) * dd; + bbox_here[3] = PP->bbox[0] + (ibbox_here[3] + 1) * dd; + + dd = (PP->bbox[4] - PP->bbox[1]) / PP->shape[1]; + bbox_here[1] = PP->bbox[1] + (ibbox_here[1]) * dd; + bbox_here[4] = PP->bbox[1] + (ibbox_here[4] + 1) * dd; + + dd = (PP->bbox[5] - PP->bbox[2]) / PP->shape[2]; + bbox_here[2] = PP->bbox[2] + (ibbox_here[2]) * dd; + bbox_here[5] = PP->bbox[2] + (ibbox_here[5] + 1) * dd; +#else +#error Not define Vertex nor Cell +#endif +#endif + +#ifdef USE_GPU_DIVIDE + { + const int pices = 2; + double picef[pices]; + picef[0] = cpu_part; + picef[1] = gpu_part; + int shape_res[dim * pices]; + double bbox_res[2 * dim * pices]; + misc::dividBlock(dim, shape_here, bbox_here, pices, picef, shape_res, bbox_res, min_width); + ng = ng0 = new Block(dim, shape_res, bbox_res, n_rank++, ingfsi, fngfsi, PP->lev, 0); // delete through KillBlocks + + // if(n_rank==cpusize) {n_rank=0; cerr<<"place one!!"<checkBlock(); + if (BlL) + BlL->insert(ng); + else + BlL = new MyList(ng); // delete through KillBlocks + + for (int i = 1; i < pices; i++) + { + ng = new Block(dim, shape_res + i * dim, bbox_res + i * 2 * dim, n_rank++, ingfsi, fngfsi, PP->lev, i); // delete through KillBlocks + // if(n_rank==cpusize) {n_rank=0; cerr<<"place two!! "<checkBlock(); + BlL->insert(ng); + } + } +#else + ng = ng0 = new Block(dim, shape_here, bbox_here, n_rank++, ingfsi, fngfsi, PP->lev); // delete through KillBlocks + // ng->checkBlock(); + if (BlL) + BlL->insert(ng); + else + BlL = new MyList(ng); // delete through KillBlocks +#endif + if (n_rank == cpusize) + n_rank = 0; + + // set PP->blb + if (i == 0 && j == 0 && k == 0) + { + MyList *Bp = BlL; + while (Bp->data != ng0) + Bp = Bp->next; // ng0 is the first of the pices list + PP->blb = Bp; + } + } + // set PP->ble + { + MyList *Bp = BlL; + while (Bp->data != ng) + Bp = Bp->next; // ng is the last of the pices list + PP->ble = Bp; + } + PLi = PLi->next; + } + if (reacpu < nodes * 2 / 3) + { + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + if (myrank == 0) + cout << "Parallel::distribute CAUSTION: level#" << lev << " uses essencially " << reacpu << " processors vs " << nodes << " nodes run, your scientific computation scale is not as large as you estimate." << endl; + } + + return BlL; +} +MyList *Parallel::distribute_hard(MyList *PatchLIST, int cpusize, int ingfsi, int fngfsi, + bool periodic, int nodes) +{ +#ifdef USE_GPU_DIVIDE + double cpu_part, gpu_part; + map::iterator iter; + iter = parameters::dou_par.find("cpu part"); + if (iter != parameters::dou_par.end()) + { + cpu_part = iter->second; + } + else + { + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + // read parameter from file + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind; + char pname[50]; + { + map::iterator iter = parameters::str_par.find("inputpar"); + if (iter != parameters::str_par.end()) + { + strcpy(pname, (iter->second).c_str()); + } + else + { + cout << "Error inputpar" << endl; + exit(0); + } + } + ifstream inf(pname, ifstream::in); + if (!inf.good() && myrank == 0) + { + cout << "Can not open parameter file " << pname << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind); + if (status == -1) + { + cout << "error reading parameter file " << pname << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "ABE") + { + if (skey == "cpu part") + cpu_part = atof(sval.c_str()); + } + } + inf.close(); + + parameters::dou_par.insert(map::value_type("cpu part", cpu_part)); + } + iter = parameters::dou_par.find("gpu part"); + if (iter != parameters::dou_par.end()) + { + gpu_part = iter->second; + } + else + { + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + // read parameter from file + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind; + char pname[50]; + { + map::iterator iter = parameters::str_par.find("inputpar"); + if (iter != parameters::str_par.end()) + { + strcpy(pname, (iter->second).c_str()); + } + else + { + cout << "Error inputpar" << endl; + exit(0); + } + } + ifstream inf(pname, ifstream::in); + if (!inf.good() && myrank == 0) + { + cout << "Can not open parameter file " << pname << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind); + if (status == -1) + { + cout << "error reading parameter file " << pname << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "ABE") + { + if (skey == "gpu part") + gpu_part = atof(sval.c_str()); + } + } + inf.close(); + + parameters::dou_par.insert(map::value_type("gpu part", gpu_part)); + } + + if (nodes == 0) + nodes = cpusize / 2; +#else + if (nodes == 0) + nodes = cpusize; +#endif + + if (dim != 3) + { + cout << "distrivute: now we only support 3-dimension" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + MyList *BlL = 0; + int split_size, min_size, block_size = 0; + + int min_width = 2 * Mymax(ghost_width, buffer_width); + int nxyz[dim], mmin_width[dim], min_shape[dim]; + + MyList *PLi = PatchLIST; + for (int i = 0; i < dim; i++) + min_shape[i] = PLi->data->shape[i]; + int lev = PLi->data->lev; + PLi = PLi->next; + while (PLi) + { + Patch *PP = PLi->data; + for (int i = 0; i < dim; i++) + min_shape[i] = Mymin(min_shape[i], PP->shape[i]); + if (lev != PLi->data->lev) + cout << "Parallel::distribute CAUSTION: meet Patches for different level: " << lev << " and " << PLi->data->lev << endl; + PLi = PLi->next; + } + + for (int i = 0; i < dim; i++) + mmin_width[i] = Mymin(min_width, min_shape[i]); + + min_size = mmin_width[0]; + for (int i = 1; i < dim; i++) + min_size = min_size * mmin_width[i]; + + PLi = PatchLIST; + while (PLi) + { + Patch *PP = PLi->data; + // PP->checkPatch(true); + int bs = PP->shape[0]; + for (int i = 1; i < dim; i++) + bs = bs * PP->shape[i]; + block_size = block_size + bs; + PLi = PLi->next; + } + split_size = Mymax(min_size, block_size / nodes); + split_size = Mymax(1, split_size); + + int n_rank = 0; + PLi = PatchLIST; + int reacpu = 0; + int current_block_id = 0; + while (PLi) { + Block *ng0, *ng; + bool first_block_in_patch = true; + Patch *PP = PLi->data; + reacpu += partition3(nxyz, split_size, mmin_width, nodes, PP->shape); + + for (int i = 0; i < nxyz[0]; i++) + for (int j = 0; j < nxyz[1]; j++) + for (int k = 0; k < nxyz[2]; k++) + { + // --- 1. 定义局部变量 --- + int ibbox_here[6], shape_here[3]; + double bbox_here[6], dd; + Block *current_ng_start = nullptr; // 本次循环产生的第一个(或唯一一个)块 + + // --- 2. 核心逻辑分支 --- + if (current_block_id == 27 || current_block_id == 28 || + current_block_id == 35 || current_block_id == 36) + { + // A. 计算原始索引 (不带 Ghost) + int ib0 = (PP->shape[0] * i) / nxyz[0]; + int ib3 = (PP->shape[0] * (i + 1)) / nxyz[0] - 1; + int jb1 = (PP->shape[1] * j) / nxyz[1]; + int jb4 = (PP->shape[1] * (j + 1)) / nxyz[1] - 1; + int kb2 = (PP->shape[2] * k) / nxyz[2]; + int kb5 = (PP->shape[2] * (k + 1)) / nxyz[2] - 1; + + int r_l, r_r; + if(current_block_id == 27) { r_l = 26; r_r = 27; } + else if(current_block_id == 28) { r_l = 28; r_r = 29; } + else if(current_block_id == 35) { r_l = 34; r_r = 35; } + else { r_l = 36; r_r = 37; } + Block * split_first_block = nullptr; + Block * split_last_block = nullptr; + // 拆分逻辑:该函数应更新类成员变量 split_first_block 和 split_last_block + splitHotspotBlock(BlL, dim, ib0, ib3, jb1, jb4, kb2, kb5, + PP, r_l, r_r, ingfsi, fngfsi, periodic,split_first_block,split_last_block); + + current_ng_start = split_first_block; + ng = split_last_block; + } + else + { + // B. 普通块逻辑 (含 Ghost 扩张) + ibbox_here[0] = (PP->shape[0] * i) / nxyz[0]; + ibbox_here[3] = (PP->shape[0] * (i + 1)) / nxyz[0] - 1; + ibbox_here[1] = (PP->shape[1] * j) / nxyz[1]; + ibbox_here[4] = (PP->shape[1] * (j + 1)) / nxyz[1] - 1; + ibbox_here[2] = (PP->shape[2] * k) / nxyz[2]; + ibbox_here[5] = (PP->shape[2] * (k + 1)) / nxyz[2] - 1; + + if (periodic) { + for(int d=0; d<3; d++) { + ibbox_here[d] -= ghost_width; + ibbox_here[d+3] += ghost_width; + } + } else { + ibbox_here[0] = Mymax(0, ibbox_here[0] - ghost_width); + ibbox_here[3] = Mymin(PP->shape[0] - 1, ibbox_here[3] + ghost_width); + ibbox_here[1] = Mymax(0, ibbox_here[1] - ghost_width); + ibbox_here[4] = Mymin(PP->shape[1] - 1, ibbox_here[4] + ghost_width); + ibbox_here[2] = Mymax(0, ibbox_here[2] - ghost_width); + ibbox_here[5] = Mymin(PP->shape[2] - 1, ibbox_here[5] + ghost_width); + } + + for(int d=0; d<3; d++) shape_here[d] = ibbox_here[d+3] - ibbox_here[d] + 1; + + // 物理坐标计算 (根据你的宏定义 Cell/Vertex) +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + // 0--4, 5--10 + dd = (PP->bbox[3] - PP->bbox[0]) / (PP->shape[0] - 1); + bbox_here[0] = PP->bbox[0] + ibbox_here[0] * dd; + bbox_here[3] = PP->bbox[0] + ibbox_here[3] * dd; + + dd = (PP->bbox[4] - PP->bbox[1]) / (PP->shape[1] - 1); + bbox_here[1] = PP->bbox[1] + ibbox_here[1] * dd; + bbox_here[4] = PP->bbox[1] + ibbox_here[4] * dd; + + dd = (PP->bbox[5] - PP->bbox[2]) / (PP->shape[2] - 1); + bbox_here[2] = PP->bbox[2] + ibbox_here[2] * dd; + bbox_here[5] = PP->bbox[2] + ibbox_here[5] * dd; +#else +#ifdef Cell + // 0--5, 5--10 + dd = (PP->bbox[3] - PP->bbox[0]) / PP->shape[0]; + bbox_here[0] = PP->bbox[0] + (ibbox_here[0]) * dd; + bbox_here[3] = PP->bbox[0] + (ibbox_here[3] + 1) * dd; + + dd = (PP->bbox[4] - PP->bbox[1]) / PP->shape[1]; + bbox_here[1] = PP->bbox[1] + (ibbox_here[1]) * dd; + bbox_here[4] = PP->bbox[1] + (ibbox_here[4] + 1) * dd; + + dd = (PP->bbox[5] - PP->bbox[2]) / PP->shape[2]; + bbox_here[2] = PP->bbox[2] + (ibbox_here[2]) * dd; + bbox_here[5] = PP->bbox[2] + (ibbox_here[5] + 1) * dd; +#else +#error Not define Vertex nor Cell +#endif +#endif + ng = createMappedBlock(BlL, dim, shape_here, bbox_here, current_block_id, ingfsi, fngfsi, PP->lev); + current_ng_start = ng; + } + + // --- 3. 统一处理 Patch 起始 Block 指针 --- + if (first_block_in_patch) { + ng0 = current_ng_start; + + // 立即设置 PP->blb,避免后续循环覆盖 ng0 + MyList *Bp_start = BlL; + while (Bp_start && Bp_start->data != ng0) Bp_start = Bp_start->next; + PP->blb = Bp_start; + + first_block_in_patch = false; + } + + current_block_id++; + } + + // --- 4. 设置 Patch 结束 Block 指针 --- + MyList *Bp_end = BlL; + while (Bp_end && Bp_end->data != ng) Bp_end = Bp_end->next; + PP->ble = Bp_end; + + PLi = PLi->next; + first_block_in_patch = true; + } + if (reacpu < nodes * 2 / 3) + { + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + if (myrank == 0) + cout << "Parallel::distribute CAUSTION: level#" << lev << " uses essencially " << reacpu << " processors vs " << nodes << " nodes run, your scientific computation scale is not as large as you estimate." << endl; + } + + return BlL; +} + + /** + * @brief 将当前 Block 几何二等分并存入列表 + * @param axis 拆分轴:0-x, 1-y, 2-z (建议选最长轴) + */ +Block* Parallel::splitHotspotBlock(MyList* &BlL, int _dim, + int ib0_orig, int ib3_orig, + int jb1_orig, int jb4_orig, + int kb2_orig, int kb5_orig, + Patch* PP, int r_left, int r_right, + int ingfsi, int fngfsi, bool periodic, + Block* &split_first_block, Block* &split_last_block) +{ + // 1. 索引二分 (基于无 ghost 的原始索引) + int mid = (ib0_orig + ib3_orig) / 2; + + // 左块原始索引: [ib0, mid], 右块原始索引: [mid+1, ib3] + int indices_L[6] = {ib0_orig, jb1_orig, kb2_orig, mid, jb4_orig, kb5_orig}; + int indices_R[6] = {mid + 1, jb1_orig, kb2_orig, ib3_orig, jb4_orig, kb5_orig}; + + // 2. 内部处理逻辑 (复刻原 distribute 逻辑) + auto createSubBlock = [&](int* ib_raw, int target_rank) { + int ib_final[6]; + int sh_here[3]; + double bb_here[6], dd; + + // --- 逻辑 A: Ghost 扩张 --- + if (periodic) { + ib_final[0] = ib_raw[0] - ghost_width; + ib_final[3] = ib_raw[3] + ghost_width; + ib_final[1] = ib_raw[1] - ghost_width; + ib_final[4] = ib_raw[4] + ghost_width; + ib_final[2] = ib_raw[2] - ghost_width; + ib_final[5] = ib_raw[5] + ghost_width; + } else { + ib_final[0] = Mymax(0, ib_raw[0] - ghost_width); + ib_final[3] = Mymin(PP->shape[0] - 1, ib_raw[3] + ghost_width); + ib_final[1] = Mymax(0, ib_raw[1] - ghost_width); + ib_final[4] = Mymin(PP->shape[1] - 1, ib_raw[4] + ghost_width); + ib_final[2] = Mymax(0, ib_raw[2] - ghost_width); + ib_final[5] = Mymin(PP->shape[2] - 1, ib_raw[5] + ghost_width); + } + + sh_here[0] = ib_final[3] - ib_final[0] + 1; + sh_here[1] = ib_final[4] - ib_final[1] + 1; + sh_here[2] = ib_final[5] - ib_final[2] + 1; + + // --- 逻辑 B: 物理坐标计算 (严格匹配 Cell 模式) --- + // X 方向 + dd = (PP->bbox[3] - PP->bbox[0]) / PP->shape[0]; + bb_here[0] = PP->bbox[0] + ib_final[0] * dd; + bb_here[3] = PP->bbox[0] + (ib_final[3] + 1) * dd; + + // Y 方向 + dd = (PP->bbox[4] - PP->bbox[1]) / PP->shape[1]; + bb_here[1] = PP->bbox[1] + ib_final[1] * dd; + bb_here[4] = PP->bbox[1] + (ib_final[4] + 1) * dd; + + // Z 方向 + dd = (PP->bbox[5] - PP->bbox[2]) / PP->shape[2]; + bb_here[2] = PP->bbox[2] + ib_final[2] * dd; + bb_here[5] = PP->bbox[2] + (ib_final[5] + 1) * dd; + + Block* Bg = new Block(dim, sh_here, bb_here, target_rank, ingfsi, fngfsi, PP->lev); + if (BlL) BlL->insert(Bg); + else BlL = new MyList(Bg); + + + return Bg; + }; + + // 执行创建 + split_first_block = createSubBlock(indices_L, r_left); + split_last_block = createSubBlock(indices_R, r_right); +} + + +/** + * @brief 创建映射后的 Block + */ + Block* Parallel::createMappedBlock(MyList* &BlL, int _dim, int* shape, double* bbox, + int block_id, int ingfsi, int fngfsi, int lev) + { + // 映射表逻辑 + int target_rank = block_id; + if (block_id == 26) target_rank = 25; + else if (block_id == 29) target_rank = 30; + else if (block_id == 34) target_rank = 33; + else if (block_id == 37) target_rank = 38; + + Block* ng = new Block(dim, shape, bbox, target_rank, ingfsi, fngfsi, lev); + + if (BlL) BlL->insert(ng); + else BlL = new MyList(ng); + + return ng; + } + + + + +#elif (PSTR == 1 || PSTR == 2 || PSTR == 3) +MyList *Parallel::distribute(MyList *PatchLIST, int cpusize, int ingfsi, int fngfsi, + bool periodic, int start_rank, int end_rank, int nodes) +{ +#ifdef USE_GPU_DIVIDE + double cpu_part, gpu_part; + map::iterator iter; + iter = parameters::dou_par.find("cpu part"); + if (iter != parameters::dou_par.end()) + { + cpu_part = iter->second; + } + else + { + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + // read parameter from file + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind; + char pname[50]; + { + map::iterator iter = parameters::str_par.find("inputpar"); + if (iter != parameters::str_par.end()) + { + strcpy(pname, (iter->second).c_str()); + } + else + { + cout << "Error inputpar" << endl; + exit(0); + } + } + ifstream inf(pname, ifstream::in); + if (!inf.good() && myrank == 0) + { + cout << "Can not open parameter file " << pname << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind); + if (status == -1) + { + cout << "error reading parameter file " << pname << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "ABE") + { + if (skey == "cpu part") + cpu_part = atof(sval.c_str()); + } + } + inf.close(); + + parameters::dou_par.insert(map::value_type("cpu part", cpu_part)); + } + iter = parameters::dou_par.find("gpu part"); + if (iter != parameters::dou_par.end()) + { + gpu_part = iter->second; + } + else + { + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + // read parameter from file + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind; + char pname[50]; + { + map::iterator iter = parameters::str_par.find("inputpar"); + if (iter != parameters::str_par.end()) + { + strcpy(pname, (iter->second).c_str()); + } + else + { + cout << "Error inputpar" << endl; + exit(0); + } + } + ifstream inf(pname, ifstream::in); + if (!inf.good() && myrank == 0) + { + cout << "Can not open parameter file " << pname << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind); + if (status == -1) + { + cout << "error reading parameter file " << pname << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "ABE") + { + if (skey == "gpu part") + gpu_part = atof(sval.c_str()); + } + } + inf.close(); + + parameters::dou_par.insert(map::value_type("gpu part", gpu_part)); + } + + if (nodes == 0) + nodes = cpusize / 2; +#else + if (nodes == 0) + nodes = cpusize; +#endif + + if (dim != 3) + { + cout << "distrivute: now we only support 3-dimension" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + MyList *BlL = 0; + + int split_size, min_size, block_size = 0; + + int min_width = 2 * Mymax(ghost_width, buffer_width); + int nxyz[dim], mmin_width[dim], min_shape[dim]; + + MyList *PLi = PatchLIST; + for (int i = 0; i < dim; i++) + min_shape[i] = PLi->data->shape[i]; + int lev = PLi->data->lev; + PLi = PLi->next; + while (PLi) + { + Patch *PP = PLi->data; + for (int i = 0; i < dim; i++) + min_shape[i] = Mymin(min_shape[i], PP->shape[i]); + if (lev != PLi->data->lev) + cout << "Parallel::distribute CAUSTION: meet Patches for different level: " << lev << " and " << PLi->data->lev << endl; + PLi = PLi->next; + } + + for (int i = 0; i < dim; i++) + mmin_width[i] = Mymin(min_width, min_shape[i]); + + min_size = mmin_width[0]; + for (int i = 1; i < dim; i++) + min_size = min_size * mmin_width[i]; + + PLi = PatchLIST; + while (PLi) + { + Patch *PP = PLi->data; + // PP->checkPatch(true); + int bs = PP->shape[0]; + for (int i = 1; i < dim; i++) + bs = bs * PP->shape[i]; + block_size = block_size + bs; + PLi = PLi->next; + } + split_size = Mymax(min_size, block_size / cpusize); + split_size = Mymax(1, split_size); + + int n_rank = start_rank; + PLi = PatchLIST; + int reacpu = 0; + while (PLi) + { + Patch *PP = PLi->data; + + reacpu += partition3(nxyz, split_size, mmin_width, cpusize, PP->shape); + + Block *ng, *ng0; + int shape_here[dim], ibbox_here[2 * dim]; + double bbox_here[2 * dim], dd; + + // ibbox : 0,...N-1 + for (int i = 0; i < nxyz[0]; i++) + for (int j = 0; j < nxyz[1]; j++) + for (int k = 0; k < nxyz[2]; k++) + { + ibbox_here[0] = (PP->shape[0] * i) / nxyz[0]; + ibbox_here[3] = (PP->shape[0] * (i + 1)) / nxyz[0] - 1; + ibbox_here[1] = (PP->shape[1] * j) / nxyz[1]; + ibbox_here[4] = (PP->shape[1] * (j + 1)) / nxyz[1] - 1; + ibbox_here[2] = (PP->shape[2] * k) / nxyz[2]; + ibbox_here[5] = (PP->shape[2] * (k + 1)) / nxyz[2] - 1; + + if (periodic) + { + ibbox_here[0] = ibbox_here[0] - ghost_width; + ibbox_here[3] = ibbox_here[3] + ghost_width; + ibbox_here[1] = ibbox_here[1] - ghost_width; + ibbox_here[4] = ibbox_here[4] + ghost_width; + ibbox_here[2] = ibbox_here[2] - ghost_width; + ibbox_here[5] = ibbox_here[5] + ghost_width; + } + else + { + ibbox_here[0] = Mymax(0, ibbox_here[0] - ghost_width); + ibbox_here[3] = Mymin(PP->shape[0] - 1, ibbox_here[3] + ghost_width); + ibbox_here[1] = Mymax(0, ibbox_here[1] - ghost_width); + ibbox_here[4] = Mymin(PP->shape[1] - 1, ibbox_here[4] + ghost_width); + ibbox_here[2] = Mymax(0, ibbox_here[2] - ghost_width); + ibbox_here[5] = Mymin(PP->shape[2] - 1, ibbox_here[5] + ghost_width); + } + + shape_here[0] = ibbox_here[3] - ibbox_here[0] + 1; + shape_here[1] = ibbox_here[4] - ibbox_here[1] + 1; + shape_here[2] = ibbox_here[5] - ibbox_here[2] + 1; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + // 0--4, 5--10 + dd = (PP->bbox[3] - PP->bbox[0]) / (PP->shape[0] - 1); + bbox_here[0] = PP->bbox[0] + ibbox_here[0] * dd; + bbox_here[3] = PP->bbox[0] + ibbox_here[3] * dd; + + dd = (PP->bbox[4] - PP->bbox[1]) / (PP->shape[1] - 1); + bbox_here[1] = PP->bbox[1] + ibbox_here[1] * dd; + bbox_here[4] = PP->bbox[1] + ibbox_here[4] * dd; + + dd = (PP->bbox[5] - PP->bbox[2]) / (PP->shape[2] - 1); + bbox_here[2] = PP->bbox[2] + ibbox_here[2] * dd; + bbox_here[5] = PP->bbox[2] + ibbox_here[5] * dd; +#else +#ifdef Cell + // 0--5, 5--10 + dd = (PP->bbox[3] - PP->bbox[0]) / PP->shape[0]; + bbox_here[0] = PP->bbox[0] + (ibbox_here[0]) * dd; + bbox_here[3] = PP->bbox[0] + (ibbox_here[3] + 1) * dd; + + dd = (PP->bbox[4] - PP->bbox[1]) / PP->shape[1]; + bbox_here[1] = PP->bbox[1] + (ibbox_here[1]) * dd; + bbox_here[4] = PP->bbox[1] + (ibbox_here[4] + 1) * dd; + + dd = (PP->bbox[5] - PP->bbox[2]) / PP->shape[2]; + bbox_here[2] = PP->bbox[2] + (ibbox_here[2]) * dd; + bbox_here[5] = PP->bbox[2] + (ibbox_here[5] + 1) * dd; +#else +#error Not define Vertex nor Cell +#endif +#endif + +#ifdef USE_GPU_DIVIDE + { + const int pices = 2; + double picef[pices]; + picef[0] = cpu_part; + picef[1] = gpu_part; + int shape_res[dim * pices]; + double bbox_res[2 * dim * pices]; + misc::dividBlock(dim, shape_here, bbox_here, pices, picef, shape_res, bbox_res, min_width); + ng = ng0 = new Block(dim, shape_res, bbox_res, n_rank++, ingfsi, fngfsi, PP->lev, 0); // delete through KillBlocks + // ng->checkBlock(); + if (BlL) + BlL->insert(ng); + else + BlL = new MyList(ng); // delete through KillBlocks + + for (int i = 1; i < pices; i++) + { + ng = new Block(dim, shape_res + i * dim, bbox_res + i * 2 * dim, n_rank++, ingfsi, fngfsi, PP->lev, i); // delete through KillBlocks + // ng->checkBlock(); + BlL->insert(ng); + } + } +#else + ng = ng0 = new Block(dim, shape_here, bbox_here, n_rank++, ingfsi, fngfsi, PP->lev); // delete through KillBlocks + // ng->checkBlock(); + if (BlL) + BlL->insert(ng); + else + BlL = new MyList(ng); // delete through KillBlocks +#endif + + if (n_rank == end_rank + 1) + n_rank = start_rank; + + // set PP->blb + if (i == 0 && j == 0 && k == 0) + { + MyList *Bp = BlL; + while (Bp->data != ng0) + Bp = Bp->next; // ng0 is the first of the pices list + PP->blb = Bp; + } + } + // set PP->ble + { + MyList *Bp = BlL; + while (Bp->data != ng) + Bp = Bp->next; // ng is the last of the pices list + PP->ble = Bp; + } + PLi = PLi->next; + } + if (reacpu < nodes * 2 / 3) + { + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + if (myrank == start_rank) + cout << "Parallel::distribute CAUSTION: level#" << lev << " uses essencially " << reacpu << " processors vs " << nodes << " nodes run, your scientific computation scale is not as large as you estimate." << endl; + } + + return BlL; +} +#endif +void Parallel::setfunction(MyList *BlL, var *vn, double func(double x, double y, double z)) +{ + while (BlL) + { + if (BlL->data->X[0]) + { + int nn = BlL->data->shape[0] * BlL->data->shape[1] * BlL->data->shape[2]; + double *p = BlL->data->fgfs[vn->sgfn]; + for (int i = 0; i < nn; i++) + { + int ind[3]; + getarrayindex(3, BlL->data->shape, ind, i); + p[i] = func(BlL->data->X[0][ind[0]], BlL->data->X[1][ind[1]], BlL->data->X[2][ind[2]]); + } + } + BlL = BlL->next; + } +} +// set function only for cpu rank +void Parallel::setfunction(int rank, MyList *BlL, var *vn, double func(double x, double y, double z)) +{ + while (BlL) + { + if (BlL->data->X[0] && BlL->data->rank == rank) + { + int nn = BlL->data->shape[0] * BlL->data->shape[1] * BlL->data->shape[2]; + double *p = BlL->data->fgfs[vn->sgfn]; + for (int i = 0; i < nn; i++) + { + int ind[3]; + getarrayindex(3, BlL->data->shape, ind, i); + p[i] = func(BlL->data->X[0][ind[0]], BlL->data->X[1][ind[1]], BlL->data->X[2][ind[2]]); + } + } + BlL = BlL->next; + } +} +void Parallel::getarrayindex(int DIM, int *shape, int *index, int n) +{ + // we assume index has already memory space + int *mu; + mu = new int[DIM]; + mu[0] = 1; + for (int i = 1; i < DIM; i++) + mu[i] = mu[i - 1] * shape[i - 1]; + for (int i = DIM - 1; i >= 0; i--) + { + index[i] = n / mu[i]; + n = n - index[i] * mu[i]; + } + + delete[] mu; +} +int Parallel::getarraylocation(int DIM, int *shape, int *index) +{ + int n, mu; + mu = shape[0]; + n = index[0]; + for (int i = 1; i < DIM; i++) + { + n = n + index[i] * mu; + mu = mu * shape[i]; + } + + return n; +} +void Parallel::copy(int DIM, double *llbout, double *uubout, int *Dshape, double *DD, double *llbin, double *uubin, + int *shape, double *datain, double *llb, double *uub) +{ + // for 3 dimensional case, based on simple test, I found this is half slower than f90 code + int *illi, *iuui; + int *illo, *iuuo; + int *indi, *indo; + illi = new int[DIM]; + iuui = new int[DIM]; + illo = new int[DIM]; + iuuo = new int[DIM]; + indi = new int[DIM]; + indo = new int[DIM]; + + int ial = 1; + for (int i = 0; i < DIM; i++) + { + double ho, hi; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + ho = (uubout[i] - llbout[i]) / (Dshape[i] - 1); + hi = (uubin[i] - llbin[i]) / (shape[i] - 1); +#else +#ifdef Cell + ho = (uubout[i] - llbout[i]) / Dshape[i]; + hi = (uubin[i] - llbin[i]) / shape[i]; +#else +#error Not define Vertex nor Cell +#endif +#endif + illo[i] = int((llb[i] - llbout[i]) / ho); + iuuo[i] = Dshape[i] - 1 - int((uubout[i] - uub[i]) / ho); + illi[i] = int((llb[i] - llbin[i]) / hi); + iuui[i] = shape[i] - 1 - int((uubin[i] - uub[i]) / hi); + + if (illo[i] > iuuo[i] || illi[i] > iuui[i] || illo[i] < 0 || illi[i] < 0 || + iuui[i] >= shape[i] || iuuo[i] >= Dshape[i]) + { + cout << "Parallel copy: in direction " << i << ":" << endl; + cout << "llb = " << llb[i] << ", uub = " << uub[i] << endl; + cout << " in data : il = " << illi[i] << ", iu = " << iuui[i] << endl; + cout << "bbox = (" << llbin[i] << "," << uubin[i] << ")" << endl; + cout << "shape = " << shape[i] << endl; + cout << "out data : il = " << illo[i] << ", iu = " << iuuo[i] << endl; + cout << "bbox = (" << llbout[i] << "," << uubout[i] << ")" << endl; + cout << "shape = " << Dshape[i] << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + int ihi = iuui[i] - illi[i] + 1, iho = iuuo[i] - illo[i] + 1; + if (!(feq(ho, hi, ho / 2)) || ihi != iho) + { + cout << "Parallel copy: in direction " << i << ":" << endl; + cout << "Parallel copy: not the same grid structure." << endl; + cout << "hi = " << hi << ", bbox = (" << llbin[i] << "," << uubin[i] << "), shape = " << shape[i] << endl; + cout << "ho = " << ho << ", bbox = (" << llbout[i] << "," << uubout[i] << "), shape = " << Dshape[i] << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + ial = ial * ihi; + } + + for (int i = 0; i < DIM; i++) + { + indi[i] = illi[i]; + indo[i] = illo[i]; + } + /* + //check start index + for(int i=0;i NNi) + { + cout << "Parallel copy: ni = " << ni << " is out of array range (0," << NNi << ")." << endl; + cout << "shape = ("; + for (int j = 0; j < DIM; j++) + { + cout << shape[j]; + if (j < DIM - 1) + cout << ","; + else + cout << ")" << endl; + } + cout << "ind = ("; + for (int j = 0; j < DIM; j++) + { + cout << indi[j]; + if (j < DIM - 1) + cout << ","; + else + cout << ")" << endl; + } + MPI_Abort(MPI_COMM_WORLD, 1); + } + DD[no] = datain[ni]; + + indi[0]++; + for (int j = 1; j < DIM; j++) + { + if (indi[j - 1] == iuui[j - 1] + 1) + { + indi[j - 1] = illi[j - 1]; + indi[j]++; + } // carry 1 to next digital + else + break; + } + indo[0]++; + for (int j = 1; j < DIM; j++) + { + if (indo[j - 1] == iuuo[j - 1] + 1) + { + indo[j - 1] = illo[j - 1]; + indo[j]++; + } + else + break; + } + } + /* + //check final index + for(int i=0;i *BlL, MyList *DumpList, char *tag, double time, double dT) +{ + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + // round at 4 and 5 + int ncount = int(time / dT + 0.5); + + MyList *Bp; + while (DumpList) + { + Bp = BlL; + int Bi = 0; + while (Bp) + { + Block *BP = Bp->data; + var *VP = DumpList->data; + if (BP->rank == myrank) + { + + string out_dir; + map::iterator iter; + iter = parameters::str_par.find("output dir"); + if (iter != parameters::str_par.end()) + { + out_dir = iter->second; + } + else + { + // read parameter from file + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind; + char pname[50]; + { + map::iterator iter = parameters::str_par.find("inputpar"); + if (iter != parameters::str_par.end()) + { + strcpy(pname, (iter->second).c_str()); + } + else + { + cout << "Error inputpar" << endl; + exit(0); + } + } + ifstream inf(pname, ifstream::in); + if (!inf.good()) + { + cout << "Can not open parameter file " << pname << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind); + if (status == -1) + { + cout << "error reading parameter file " << pname << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "ABE") + { + if (skey == "output dir") + out_dir = sval; + } + } + inf.close(); + + parameters::str_par.insert(map::value_type("output dir", out_dir)); + } + + char filename[100]; + if (tag) + sprintf(filename, "%s/%s_Lev%02d-%02d_%02d_%s_%05d.bin", out_dir.c_str(), tag, BP->lev, Bi, myrank, VP->name, ncount); + else + sprintf(filename, "%s/Lev%02d-%02d_%02d_%s_%05d.bin", out_dir.c_str(), BP->lev, Bi, myrank, VP->name, ncount); + writefile(time, BP->shape[0], BP->shape[1], BP->shape[2], BP->bbox[0], BP->bbox[3], BP->bbox[1], BP->bbox[4], + BP->bbox[2], BP->bbox[5], filename, BP->fgfs[VP->sgfn]); + cout << "end of dump " << VP->name << " at time " << time << ", on node " << myrank << endl; + } + Bp = Bp->next; + Bi++; + } + DumpList = DumpList->next; + } +} +// Now we dump the data including buffer points +void Parallel::Dump_Data(Patch *PP, MyList *DumpList, char *tag, double time, double dT, int grd) +{ + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + // round at 4 and 5 + int ncount = int(time / dT + 0.5); + + MPI_Status sta; + int DIM = 3; + double llb[3], uub[3]; + double DX, DY, DZ; + + double *databuffer = 0; + if (myrank == 0) + { + databuffer = (double *)malloc(sizeof(double) * PP->shape[0] * PP->shape[1] * PP->shape[2]); + if (!databuffer) + { + cout << "Parallel::Dump_Data: out of memory when dumping data." << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + + while (DumpList) + { + var *VP = DumpList->data; + + MyList *Bp = PP->blb; + while (Bp) + { + Block *BP = Bp->data; + if (BP->rank == 0 && myrank == 0) + { + DX = BP->getdX(0); + DY = BP->getdX(1); + DZ = BP->getdX(2); + llb[0] = (feq(BP->bbox[0], PP->bbox[0], DX / 2)) ? BP->bbox[0] : BP->bbox[0] + ghost_width * DX; + llb[1] = (feq(BP->bbox[1], PP->bbox[1], DY / 2)) ? BP->bbox[1] : BP->bbox[1] + ghost_width * DY; + llb[2] = (feq(BP->bbox[2], PP->bbox[2], DZ / 2)) ? BP->bbox[2] : BP->bbox[2] + ghost_width * DZ; + uub[0] = (feq(BP->bbox[3], PP->bbox[3], DX / 2)) ? BP->bbox[3] : BP->bbox[3] - ghost_width * DX; + uub[1] = (feq(BP->bbox[4], PP->bbox[4], DY / 2)) ? BP->bbox[4] : BP->bbox[4] - ghost_width * DY; + uub[2] = (feq(BP->bbox[5], PP->bbox[5], DZ / 2)) ? BP->bbox[5] : BP->bbox[5] - ghost_width * DZ; + f_copy(DIM, PP->bbox, PP->bbox + DIM, PP->shape, databuffer, BP->bbox, BP->bbox + DIM, BP->shape, BP->fgfs[VP->sgfn], llb, uub); + } + else + { + int nnn = (BP->shape[0]) * (BP->shape[1]) * (BP->shape[2]); + if (myrank == 0) + { + double *bufferhere = (double *)malloc(sizeof(double) * nnn); + if (!bufferhere) + { + cout << "on node#" << myrank << ", out of memory when dumping data." << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + MPI_Recv(bufferhere, nnn, MPI_DOUBLE, BP->rank, 0, MPI_COMM_WORLD, &sta); + DX = BP->getdX(0); + DY = BP->getdX(1); + DZ = BP->getdX(2); + llb[0] = (feq(BP->bbox[0], PP->bbox[0], DX / 2)) ? BP->bbox[0] : BP->bbox[0] + ghost_width * DX; + llb[1] = (feq(BP->bbox[1], PP->bbox[1], DY / 2)) ? BP->bbox[1] : BP->bbox[1] + ghost_width * DY; + llb[2] = (feq(BP->bbox[2], PP->bbox[2], DZ / 2)) ? BP->bbox[2] : BP->bbox[2] + ghost_width * DZ; + uub[0] = (feq(BP->bbox[3], PP->bbox[3], DX / 2)) ? BP->bbox[3] : BP->bbox[3] - ghost_width * DX; + uub[1] = (feq(BP->bbox[4], PP->bbox[4], DY / 2)) ? BP->bbox[4] : BP->bbox[4] - ghost_width * DY; + uub[2] = (feq(BP->bbox[5], PP->bbox[5], DZ / 2)) ? BP->bbox[5] : BP->bbox[5] - ghost_width * DZ; + f_copy(DIM, PP->bbox, PP->bbox + DIM, PP->shape, databuffer, BP->bbox, BP->bbox + DIM, BP->shape, bufferhere, llb, uub); + free(bufferhere); + } + else if (myrank == BP->rank) + { + MPI_Send(BP->fgfs[VP->sgfn], nnn, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); + } + } + if (Bp == PP->ble) + break; + Bp = Bp->next; + } + if (myrank == 0) + { + + string out_dir; + map::iterator iter; + iter = parameters::str_par.find("output dir"); + if (iter != parameters::str_par.end()) + { + out_dir = iter->second; + } + else + { + // read parameter from file + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind; + char pname[50]; + { + map::iterator iter = parameters::str_par.find("inputpar"); + if (iter != parameters::str_par.end()) + { + strcpy(pname, (iter->second).c_str()); + } + else + { + cout << "Error inputpar" << endl; + exit(0); + } + } + ifstream inf(pname, ifstream::in); + if (!inf.good()) + { + cout << "Can not open parameter file " << pname << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind); + if (status == -1) + { + cout << "error reading parameter file " << pname << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "ABE") + { + if (skey == "output dir") + out_dir = sval; + } + } + inf.close(); + + parameters::str_par.insert(map::value_type("output dir", out_dir)); + } + + char filename[100]; + if (tag) + sprintf(filename, "%s/%s_Lev%02d-%02d_%s_%05d.bin", out_dir.c_str(), tag, PP->lev, grd, VP->name, ncount); + else + sprintf(filename, "%s/Lev%02d-%02d_%s_%05d.bin", out_dir.c_str(), PP->lev, grd, VP->name, ncount); + + writefile(time, PP->shape[0], PP->shape[1], PP->shape[2], PP->bbox[0], PP->bbox[3], PP->bbox[1], PP->bbox[4], + PP->bbox[2], PP->bbox[5], filename, databuffer); + } + DumpList = DumpList->next; + } + + if (myrank == 0) + free(databuffer); +} +void Parallel::Dump_Data(MyList *PL, MyList *DumpList, char *tag, double time, double dT) +{ + MyList *Pp; + Pp = PL; + int grd = 0; + while (Pp) + { + Patch *PP = Pp->data; + Dump_Data(PP, DumpList, tag, time, dT, grd); + grd++; + Pp = Pp->next; + } +} +// collect the data including buffer points +double *Parallel::Collect_Data(Patch *PP, var *VP) +{ + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + MPI_Status sta; + int DIM = 3; + double llb[3], uub[3]; + double DX, DY, DZ; + + double *databuffer = 0; + if (myrank == 0) + { + databuffer = (double *)malloc(sizeof(double) * PP->shape[0] * PP->shape[1] * PP->shape[2]); + if (!databuffer) + { + cout << "Parallel::Collect_Data: out of memory when dumping data." << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + + MyList *Bp = PP->blb; + while (Bp) + { + Block *BP = Bp->data; + if (BP->rank == 0 && myrank == 0) + { + DX = BP->getdX(0); + DY = BP->getdX(1); + DZ = BP->getdX(2); + llb[0] = (feq(BP->bbox[0], PP->bbox[0], DX / 2)) ? BP->bbox[0] : BP->bbox[0] + ghost_width * DX; + llb[1] = (feq(BP->bbox[1], PP->bbox[1], DY / 2)) ? BP->bbox[1] : BP->bbox[1] + ghost_width * DY; + llb[2] = (feq(BP->bbox[2], PP->bbox[2], DZ / 2)) ? BP->bbox[2] : BP->bbox[2] + ghost_width * DZ; + uub[0] = (feq(BP->bbox[3], PP->bbox[3], DX / 2)) ? BP->bbox[3] : BP->bbox[3] - ghost_width * DX; + uub[1] = (feq(BP->bbox[4], PP->bbox[4], DY / 2)) ? BP->bbox[4] : BP->bbox[4] - ghost_width * DY; + uub[2] = (feq(BP->bbox[5], PP->bbox[5], DZ / 2)) ? BP->bbox[5] : BP->bbox[5] - ghost_width * DZ; + f_copy(DIM, PP->bbox, PP->bbox + DIM, PP->shape, databuffer, BP->bbox, BP->bbox + DIM, BP->shape, BP->fgfs[VP->sgfn], llb, uub); + } + else + { + int nnn = (BP->shape[0]) * (BP->shape[1]) * (BP->shape[2]); + if (myrank == 0) + { + double *bufferhere = (double *)malloc(sizeof(double) * nnn); + if (!bufferhere) + { + cout << "on node#" << myrank << ", out of memory when dumping data." << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + MPI_Recv(bufferhere, nnn, MPI_DOUBLE, BP->rank, 0, MPI_COMM_WORLD, &sta); + DX = BP->getdX(0); + DY = BP->getdX(1); + DZ = BP->getdX(2); + llb[0] = (feq(BP->bbox[0], PP->bbox[0], DX / 2)) ? BP->bbox[0] : BP->bbox[0] + ghost_width * DX; + llb[1] = (feq(BP->bbox[1], PP->bbox[1], DY / 2)) ? BP->bbox[1] : BP->bbox[1] + ghost_width * DY; + llb[2] = (feq(BP->bbox[2], PP->bbox[2], DZ / 2)) ? BP->bbox[2] : BP->bbox[2] + ghost_width * DZ; + uub[0] = (feq(BP->bbox[3], PP->bbox[3], DX / 2)) ? BP->bbox[3] : BP->bbox[3] - ghost_width * DX; + uub[1] = (feq(BP->bbox[4], PP->bbox[4], DY / 2)) ? BP->bbox[4] : BP->bbox[4] - ghost_width * DY; + uub[2] = (feq(BP->bbox[5], PP->bbox[5], DZ / 2)) ? BP->bbox[5] : BP->bbox[5] - ghost_width * DZ; + f_copy(DIM, PP->bbox, PP->bbox + DIM, PP->shape, databuffer, BP->bbox, BP->bbox + DIM, BP->shape, bufferhere, llb, uub); + free(bufferhere); + } + else if (myrank == BP->rank) + { + MPI_Send(BP->fgfs[VP->sgfn], nnn, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); + } + } + if (Bp == PP->ble) + break; + Bp = Bp->next; + } + + return databuffer; +} +// Now we dump the data including buffer points +// dump z = 0 plane +void Parallel::d2Dump_Data(Patch *PP, MyList *DumpList, char *tag, double time, double dT, int grd) +{ + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + // round at 4 and 5 + int ncount = int(time / dT + 0.5); + + MPI_Status sta; + int DIM = 3; + double llb[3], uub[3]; + double DX, DY, DZ; + + double *databuffer = 0, *databuffer2 = 0; + if (myrank == 0) + { + databuffer = (double *)malloc(sizeof(double) * PP->shape[0] * PP->shape[1] * PP->shape[2]); + databuffer2 = (double *)malloc(sizeof(double) * PP->shape[0] * PP->shape[1]); + if (!databuffer || !databuffer2) + { + cout << "Parallel::d2Dump_Data: out of memory when dumping data." << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + + while (DumpList) + { + var *VP = DumpList->data; + + MyList *Bp = PP->blb; + while (Bp) + { + Block *BP = Bp->data; + if (BP->rank == 0 && myrank == 0) + { + DX = BP->getdX(0); + DY = BP->getdX(1); + DZ = BP->getdX(2); + llb[0] = (feq(BP->bbox[0], PP->bbox[0], DX / 2)) ? BP->bbox[0] : BP->bbox[0] + ghost_width * DX; + llb[1] = (feq(BP->bbox[1], PP->bbox[1], DY / 2)) ? BP->bbox[1] : BP->bbox[1] + ghost_width * DY; + llb[2] = (feq(BP->bbox[2], PP->bbox[2], DZ / 2)) ? BP->bbox[2] : BP->bbox[2] + ghost_width * DZ; + uub[0] = (feq(BP->bbox[3], PP->bbox[3], DX / 2)) ? BP->bbox[3] : BP->bbox[3] - ghost_width * DX; + uub[1] = (feq(BP->bbox[4], PP->bbox[4], DY / 2)) ? BP->bbox[4] : BP->bbox[4] - ghost_width * DY; + uub[2] = (feq(BP->bbox[5], PP->bbox[5], DZ / 2)) ? BP->bbox[5] : BP->bbox[5] - ghost_width * DZ; + f_copy(DIM, PP->bbox, PP->bbox + DIM, PP->shape, databuffer, BP->bbox, BP->bbox + DIM, BP->shape, BP->fgfs[VP->sgfn], llb, uub); + } + else + { + int nnn = (BP->shape[0]) * (BP->shape[1]) * (BP->shape[2]); + if (myrank == 0) + { + double *bufferhere = (double *)malloc(sizeof(double) * nnn); + if (!bufferhere) + { + cout << "on node#" << myrank << ", out of memory when dumping data." << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + MPI_Recv(bufferhere, nnn, MPI_DOUBLE, BP->rank, 0, MPI_COMM_WORLD, &sta); + DX = BP->getdX(0); + DY = BP->getdX(1); + DZ = BP->getdX(2); + llb[0] = (feq(BP->bbox[0], PP->bbox[0], DX / 2)) ? BP->bbox[0] : BP->bbox[0] + ghost_width * DX; + llb[1] = (feq(BP->bbox[1], PP->bbox[1], DY / 2)) ? BP->bbox[1] : BP->bbox[1] + ghost_width * DY; + llb[2] = (feq(BP->bbox[2], PP->bbox[2], DZ / 2)) ? BP->bbox[2] : BP->bbox[2] + ghost_width * DZ; + uub[0] = (feq(BP->bbox[3], PP->bbox[3], DX / 2)) ? BP->bbox[3] : BP->bbox[3] - ghost_width * DX; + uub[1] = (feq(BP->bbox[4], PP->bbox[4], DY / 2)) ? BP->bbox[4] : BP->bbox[4] - ghost_width * DY; + uub[2] = (feq(BP->bbox[5], PP->bbox[5], DZ / 2)) ? BP->bbox[5] : BP->bbox[5] - ghost_width * DZ; + f_copy(DIM, PP->bbox, PP->bbox + DIM, PP->shape, databuffer, BP->bbox, BP->bbox + DIM, BP->shape, bufferhere, llb, uub); + free(bufferhere); + } + else if (myrank == BP->rank) + { + MPI_Send(BP->fgfs[VP->sgfn], nnn, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); + } + } + if (Bp == PP->ble) + break; + Bp = Bp->next; + } + if (myrank == 0) + { + + string out_dir; + map::iterator iter; + iter = parameters::str_par.find("output dir"); + if (iter != parameters::str_par.end()) + { + out_dir = iter->second; + } + else + { + // read parameter from file + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind; + char pname[50]; + { + map::iterator iter = parameters::str_par.find("inputpar"); + if (iter != parameters::str_par.end()) + { + strcpy(pname, (iter->second).c_str()); + } + else + { + cout << "Error inputpar" << endl; + exit(0); + } + } + ifstream inf(pname, ifstream::in); + if (!inf.good()) + { + cout << "Can not open parameter file " << pname << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind); + if (status == -1) + { + cout << "error reading parameter file " << pname << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "ABE") + { + if (skey == "output dir") + out_dir = sval; + } + } + inf.close(); + + parameters::str_par.insert(map::value_type("output dir", out_dir)); + } + + char filename[100]; + if (tag) + sprintf(filename, "%s/%s_2d_Lev%02d-%02d_%s_%05d.dat", out_dir.c_str(), tag, PP->lev, grd, VP->name, ncount); + else + sprintf(filename, "%s/2d_Lev%02d-%02d_%s_%05d.dat", out_dir.c_str(), PP->lev, grd, VP->name, ncount); + + int gord = ghost_width; + f_d2dump(DIM, PP->bbox, PP->bbox + DIM, PP->shape, databuffer, databuffer2, gord, VP->SoA); + writefile(time, PP->shape[0], PP->shape[1], PP->bbox[0], PP->bbox[3], PP->bbox[1], PP->bbox[4], + filename, databuffer2); + } + DumpList = DumpList->next; + } + + if (myrank == 0) + { + free(databuffer); + free(databuffer2); + } +} +void Parallel::d2Dump_Data(MyList *PL, MyList *DumpList, char *tag, double time, double dT) +{ + MyList *Pp; + Pp = PL; + int grd = 0; + while (Pp) + { + Patch *PP = Pp->data; + d2Dump_Data(PP, DumpList, tag, time, dT, grd); + grd++; + Pp = Pp->next; + } +} +// Now we dump the data including buffer points and ghost points of the given patch +void Parallel::Dump_Data0(Patch *PP, MyList *DumpList, char *tag, double time, double dT) +{ + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + // round at 4 and 5 + int ncount = int(time / dT + 0.5); + + MPI_Status sta; + int DIM = 3; + double llb[3], uub[3], tllb[3], tuub[3]; + int tshape[3]; + double DX, DY, DZ; + + for (int i = 0; i < 3; i++) + { + double DX = PP->blb->data->getdX(i); + tshape[i] = PP->shape[i] + 2 * ghost_width; + tllb[i] = PP->bbox[i] - ghost_width * DX; + tuub[i] = PP->bbox[i + dim] + ghost_width * DX; + } + + int NN = tshape[0] * tshape[1] * tshape[2]; + double *databuffer = 0; + if (myrank == 0) + { + databuffer = (double *)malloc(sizeof(double) * NN); + if (!databuffer) + { + cout << "on node# " << myrank << ", out of memory when dumping data." << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + + while (DumpList) + { + var *VP = DumpList->data; + MyList *Bp = PP->blb; + while (Bp) + { + Block *BP = Bp->data; + if (BP->rank == 0 && myrank == 0) + { + DX = BP->getdX(0); + DY = BP->getdX(1); + DZ = BP->getdX(2); + llb[0] = (feq(BP->bbox[0], tllb[0], DX / 2)) ? BP->bbox[0] : BP->bbox[0] + ghost_width * DX; + llb[1] = (feq(BP->bbox[1], tllb[1], DY / 2)) ? BP->bbox[1] : BP->bbox[1] + ghost_width * DY; + llb[2] = (feq(BP->bbox[2], tllb[2], DZ / 2)) ? BP->bbox[2] : BP->bbox[2] + ghost_width * DZ; + uub[0] = (feq(BP->bbox[3], tuub[0], DX / 2)) ? BP->bbox[3] : BP->bbox[3] - ghost_width * DX; + uub[1] = (feq(BP->bbox[4], tuub[1], DY / 2)) ? BP->bbox[4] : BP->bbox[4] - ghost_width * DY; + uub[2] = (feq(BP->bbox[5], tuub[2], DZ / 2)) ? BP->bbox[5] : BP->bbox[5] - ghost_width * DZ; + f_copy(DIM, tllb, tuub, tshape, databuffer, BP->bbox, BP->bbox + DIM, BP->shape, BP->fgfs[VP->sgfn], llb, uub); + } + else + { + if (myrank == 0) + { + int nnn = (BP->shape[0]) * (BP->shape[1]) * (BP->shape[2]); + double *bufferhere = (double *)malloc(sizeof(double) * nnn); + if (!bufferhere) + { + cout << "on node#" << myrank << ", out of memory when dumping data." << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + MPI_Recv(bufferhere, nnn, MPI_DOUBLE, BP->rank, 0, MPI_COMM_WORLD, &sta); + DX = BP->getdX(0); + DY = BP->getdX(1); + DZ = BP->getdX(2); + llb[0] = (feq(BP->bbox[0], tllb[0], DX / 2)) ? BP->bbox[0] : BP->bbox[0] + ghost_width * DX; + llb[1] = (feq(BP->bbox[1], tllb[1], DY / 2)) ? BP->bbox[1] : BP->bbox[1] + ghost_width * DY; + llb[2] = (feq(BP->bbox[2], tllb[2], DZ / 2)) ? BP->bbox[2] : BP->bbox[2] + ghost_width * DZ; + uub[0] = (feq(BP->bbox[3], tuub[0], DX / 2)) ? BP->bbox[3] : BP->bbox[3] - ghost_width * DX; + uub[1] = (feq(BP->bbox[4], tuub[1], DY / 2)) ? BP->bbox[4] : BP->bbox[4] - ghost_width * DY; + uub[2] = (feq(BP->bbox[5], tuub[2], DZ / 2)) ? BP->bbox[5] : BP->bbox[5] - ghost_width * DZ; + f_copy(DIM, tllb, tuub, tshape, databuffer, BP->bbox, BP->bbox + DIM, BP->shape, bufferhere, llb, uub); + free(bufferhere); + } + else if (myrank == BP->rank) + { + int nnn = (BP->shape[0]) * (BP->shape[1]) * (BP->shape[2]); + MPI_Send(BP->fgfs[VP->sgfn], nnn, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); + } + } + if (Bp == PP->ble) + break; + Bp = Bp->next; + } + if (myrank == 0) + { + + string out_dir; + map::iterator iter; + iter = parameters::str_par.find("output dir"); + if (iter != parameters::str_par.end()) + { + out_dir = iter->second; + } + else + { + // read parameter from file + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind; + char pname[50]; + { + map::iterator iter = parameters::str_par.find("inputpar"); + if (iter != parameters::str_par.end()) + { + strcpy(pname, (iter->second).c_str()); + } + else + { + cout << "Error inputpar" << endl; + exit(0); + } + } + ifstream inf(pname, ifstream::in); + if (!inf.good()) + { + cout << "Can not open parameter file " << pname << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind); + if (status == -1) + { + cout << "error reading parameter file " << pname << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "ABE") + { + if (skey == "output dir") + out_dir = sval; + } + } + inf.close(); + + parameters::str_par.insert(map::value_type("output dir", out_dir)); + } + + char filename[100]; + if (tag) + sprintf(filename, "%s/%s_Lev%02d_%s_%05d.bin", out_dir.c_str(), tag, PP->lev, VP->name, ncount); + else + sprintf(filename, "%s/Lev%02d_%s_%05d.bin", out_dir.c_str(), PP->lev, VP->name, ncount); + + writefile(time, tshape[0], tshape[1], tshape[2], tllb[0], tuub[0], tllb[1], tuub[2], + tllb[2], tuub[2], filename, databuffer); + } + DumpList = DumpList->next; + } + + if (myrank == 0) + free(databuffer); +} +// Map point is much easier than maping data itself +// But the main problem is about the points near the boundary +// worst case is -ghost -ghost+1 .... 0 * ...... +double Parallel::global_interp(int DIM, int *ext, double **CoX, double *datain, + double *poXb, int ordn, double *SoA, int Symmetry) +{ + if (DIM != 3) + { + cout << "Parallel::global_interp does not suport DIM = " << DIM << " for Symmetry." << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + double resu; + double poX[3]; + double asgn = 1; + + for (int i = 0; i < 3; i++) + poX[i] = poXb[i]; + + switch (Symmetry) + { + case 2: + for (int i = 0; i < 3; i++) + if (poX[i] < 0) + { + poX[i] = -poX[i]; + asgn = asgn * SoA[i]; + } + break; + case 1: + if (poX[2] < 0) + { + poX[2] = -poX[2]; + asgn = asgn * SoA[2]; + } + } + + int extb[3]; + + for (int i = 0; i < 3; i++) + extb[i] = ext[i]; + + switch (Symmetry) + { +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + case 2: + if (poX[0] < (ghost_width - 1) * (CoX[0][1] - CoX[0][0])) + extb[0] = extb[0] + ghost_width - 1; + if (poX[1] < (ghost_width - 1) * (CoX[1][1] - CoX[1][0])) + extb[1] = extb[1] + ghost_width - 1; + case 1: + if (poX[2] < (ghost_width - 1) * (CoX[2][1] - CoX[2][0])) + extb[2] = extb[2] + ghost_width - 1; +#else +#ifdef Cell + case 2: + if (poX[0] < (ghost_width - 0.5) * (CoX[0][1] - CoX[0][0])) + extb[0] = extb[0] + ghost_width; + if (poX[1] < (ghost_width - 0.5) * (CoX[1][1] - CoX[1][0])) + extb[1] = extb[1] + ghost_width; + case 1: + if (poX[2] < (ghost_width - 0.5) * (CoX[2][1] - CoX[2][0])) + extb[2] = extb[2] + ghost_width; +#else +#error Not define Vertex nor Cell +#endif +#endif + } + + if (extb[0] > ext[0] || extb[1] > ext[1] || extb[2] > ext[2]) + { + double *CoXb[3]; + int Nb = extb[0] * extb[1] * extb[2]; + double *datab; + datab = new double[Nb]; + for (int i = 0; i < 3; i++) + { + CoXb[i] = new double[extb[i]]; + double DH = CoX[i][1] - CoX[i][0]; + if (extb[i] > ext[i]) + { + if (CoX[i][0] > DH) + { + cout << "lower boundary[" << i << "] = " << CoX[i][0] << ", but SYmmetry = " << Symmetry << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + for (int j = 0; j < ghost_width - 1; j++) + CoXb[i][j] = -CoX[i][ghost_width - 1 - j]; + for (int j = ghost_width - 1; j < extb[i]; j++) + CoXb[i][j] = CoX[i][j - ghost_width + 1]; +#else +#ifdef Cell + for (int j = 0; j < ghost_width; j++) + CoXb[i][j] = -CoX[i][ghost_width - 1 - j]; + for (int j = ghost_width; j < extb[i]; j++) + CoXb[i][j] = CoX[i][j - ghost_width]; +#else +#error Not define Vertex nor Cell +#endif +#endif + } + else + { + for (int j = 0; j < extb[i]; j++) + CoXb[i][j] = CoX[i][j]; + } + } + + for (int i = 0; i < Nb; i++) + { + int ind[3], indb[3]; + getarrayindex(3, extb, indb, i); + double sgn = 1; + for (int j = 0; j < 3; j++) + { + if (extb[j] > ext[j]) + { +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + if (indb[j] < ghost_width - 1) + { + ind[j] = ghost_width - 1 - indb[j]; + sgn = sgn * SoA[j]; + } + else + { + ind[j] = 1 + indb[j] - ghost_width; + } +#else +#ifdef Cell + if (indb[j] < ghost_width) + { + ind[j] = ghost_width - 1 - indb[j]; + sgn = sgn * SoA[j]; + } + else + { + ind[j] = indb[j] - ghost_width; + } +#else +#error Not define Vertex nor Cell +#endif +#endif + } + else + ind[j] = indb[j]; + } + int lon = getarraylocation(3, ext, ind); + datab[i] = datain[lon] * sgn; + } + + resu = global_interp(DIM, extb, CoXb, datab, poX, ordn); + + for (int i = 0; i < 3; i++) + delete[] CoXb[i]; + delete[] datab; + } + else + { + resu = global_interp(DIM, ext, CoX, datain, poX, ordn); + } + + return resu * asgn; +} +double Parallel::global_interp(int DIM, int *ext, double **CoX, double *datain, + double *poX, int ordn) +{ + if (ordn > 2 * ghost_width) + { + cout << "Parallel::global_interp can not handle ordn = " << ordn << " > 2*ghost_width = " << 2 * ghost_width << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + double *bbox, *datainbbox; + bbox = new double[2 * DIM]; + datainbbox = new double[2 * DIM]; + + int *NN, *ind, *shape; + NN = new int[DIM]; + ind = new int[DIM]; + shape = new int[DIM]; + + for (int i = 0; i < DIM; i++) + { + ind[i] = int((poX[i] - CoX[i][0]) / (CoX[i][1] - CoX[i][0])) - ordn / 2 + 1; + // poX may exactly locate on the boundary (exclude ghost) + if (ind[i] == -1 && feq(poX[i], CoX[i][0], (CoX[i][1] - CoX[i][0]) / 2)) + ind[i] = 0; + /* + if(ind[i] < 0) + { + cout<<"Parallel::global_interp error ind["< ext = "<= 0; i--) + NN[i] = NN[i + 1] * ordn; + + double *xpts, *funcvals; + xpts = new double[ordn]; + funcvals = new double[ordn]; + double *DDd, *DDd1, rr; + + DDd = new double[NN[0]]; + + copy(DIM, bbox, bbox + DIM, shape, DDd, datainbbox, datainbbox + DIM, ext, datain, bbox, bbox + DIM); + + for (int i = 0; i < DIM; i++) + { + for (int j = ind[i]; j < ind[i] + ordn; j++) + { + xpts[j - ind[i]] = CoX[i][j]; + } + + if (i < DIM - 1) + { + DDd1 = new double[NN[i + 1]]; + for (int j = 0; j < NN[i + 1]; j++) + { + for (int k = 0; k < ordn; k++) + funcvals[k] = DDd[k + j * ordn]; + DDd1[j] = Lagrangian_Int(poX[i], ordn, xpts, funcvals); + } + delete[] DDd; + DDd = DDd1; + } + else + { + for (int j = 0; j < ordn; j++) + funcvals[j] = DDd[j]; + rr = Lagrangian_Int(poX[i], ordn, xpts, funcvals); + delete[] DDd1; // since DDd and DDd1 now point to the same stuff, we need delete after above int + } + } + + delete[] NN; + delete[] ind; + delete[] xpts; + delete[] funcvals; + delete[] bbox; + delete[] datainbbox; + delete[] shape; + + return rr; +} +double Parallel::Lagrangian_Int(double x, int npts, double *xpts, double *funcvals) +{ + double sum = 0; + for (int i = 0; i < npts; i++) + { + sum = sum + funcvals[i] * LagrangePoly(x, i, npts, xpts); + } + return sum; +} +double Parallel::LagrangePoly(double x, int pt, int npts, double *xpts) +{ + double h = 1; + int i; + + for (i = 0; i < pt; i++) + h = h * (x - xpts[i]) / (xpts[pt] - xpts[i]); + + for (i = pt + 1; i < npts; i++) + h = h * (x - xpts[i]) / (xpts[pt] - xpts[i]); + + return h; +} +// collect all grid segments or blocks including ghost and buffer for given patch +MyList *Parallel::build_complete_gsl(Patch *Pat) +{ + MyList *cgsl = 0, *gs; + MyList *BP = Pat->blb; + while (BP) + { + if (!cgsl) + { + cgsl = gs = new MyList; // delete through destroyList(); + gs->data = new Parallel::gridseg; + } + else + { + gs->next = new MyList; + gs = gs->next; + gs->data = new Parallel::gridseg; + } + + for (int i = 0; i < dim; i++) + { + gs->data->llb[i] = BP->data->bbox[i]; + gs->data->uub[i] = BP->data->bbox[dim + i]; + gs->data->shape[i] = BP->data->shape[i]; + } + gs->data->Bg = BP->data; + gs->next = 0; + + if (BP == Pat->ble) + break; + BP = BP->next; + } + + return cgsl; +} +// collect all grid segments or blocks including ghost and buffer for given patch list +MyList *Parallel::build_complete_gsl(MyList *PatL) +{ + MyList *cgsl = 0, *gs; + while (PatL) + { + if (!cgsl) + { + cgsl = build_complete_gsl(PatL->data); + gs = cgsl; + while (gs->next) + gs = gs->next; + } + else + { + gs->next = build_complete_gsl(PatL->data); + gs = gs->next; + while (gs->next) + gs = gs->next; + } + PatL = PatL->next; + } + + return cgsl; +} +// cellect the information of Patch list +MyList *Parallel::build_complete_gsl_virtual(MyList *PatL) +{ + MyList *cgsl = 0, *gs; + while (PatL) + { + if (cgsl) + { + gs->next = new MyList; + gs = gs->next; + gs->data = new Parallel::gridseg; + } + else + { + cgsl = gs = new MyList; + gs->data = new Parallel::gridseg; + } + + for (int i = 0; i < dim; i++) + { + gs->data->llb[i] = PatL->data->bbox[i]; + gs->data->uub[i] = PatL->data->bbox[dim + i]; + gs->data->shape[i] = PatL->data->shape[i]; + } + gs->data->Bg = 0; + gs->next = 0; + + PatL = PatL->next; + } + + return cgsl; +} +// cellect the information of Patch list without buffer points +MyList *Parallel::build_complete_gsl_virtual2(MyList *PatL) // - buffer +{ + MyList *cgsl = 0, *gs; + while (PatL) + { + if (cgsl) + { + gs->next = new MyList; + gs = gs->next; + gs->data = new Parallel::gridseg; + } + else + { + cgsl = gs = new MyList; + gs->data = new Parallel::gridseg; + } + + for (int i = 0; i < dim; i++) + { + double DH = PatL->data->getdX(i); + gs->data->llb[i] = PatL->data->bbox[i] + PatL->data->lli[i] * DH; + gs->data->uub[i] = PatL->data->bbox[dim + i] - PatL->data->uui[i] * DH; + gs->data->shape[i] = PatL->data->shape[i] - PatL->data->lli[i] - PatL->data->uui[i]; + } + gs->data->Bg = 0; + gs->next = 0; + + PatL = PatL->next; + } + + return cgsl; +} +// collect all grid segments or blocks without ghost for given patch, without extension +MyList *Parallel::build_bulk_gsl(Patch *Pat) +{ + MyList *cgsl = 0, *gs; + MyList *BP = Pat->blb; + while (BP) + { + Block *bp = BP->data; + if (!cgsl) + { + cgsl = gs = new MyList; + gs->data = new Parallel::gridseg; + } + else + { + gs->next = new MyList; + gs = gs->next; + gs->data = new Parallel::gridseg; + } + + for (int i = 0; i < dim; i++) + { + double DH = bp->getdX(i); + gs->data->uub[i] = (feq(bp->bbox[dim + i], Pat->bbox[dim + i], DH / 2)) ? bp->bbox[dim + i] : bp->bbox[dim + i] - ghost_width * DH; + gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] : bp->bbox[i] + ghost_width * DH; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1; +#else +#ifdef Cell + gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4); +#else +#error Not define Vertex nor Cell +#endif +#endif + } + gs->data->Bg = BP->data; + gs->next = 0; + + if (BP == Pat->ble) + break; + BP = BP->next; + } + + return cgsl; +} +// bulk part for given Block within given patch, without extension +MyList *Parallel::build_bulk_gsl(Block *bp, Patch *Pat) +{ + MyList *gs = 0; + + gs = new MyList; + gs->data = new Parallel::gridseg; + + for (int i = 0; i < dim; i++) + { + double DH = bp->getdX(i); + gs->data->uub[i] = (feq(bp->bbox[dim + i], Pat->bbox[dim + i], DH / 2)) ? bp->bbox[dim + i] : bp->bbox[dim + i] - ghost_width * DH; + gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] : bp->bbox[i] + ghost_width * DH; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1; +#else +#ifdef Cell + gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4); +#else +#error Not define Vertex nor Cell +#endif +#endif + } + gs->data->Bg = bp; + gs->next = 0; + + return gs; +} +MyList *Parallel::clone_gsl(MyList *p, bool first_only) +{ + MyList *np = 0, *q = 0, *pq = 0; + + while (p) + { + q = new MyList; + q->data = new Parallel::gridseg; + q->data->Bg = p->data->Bg; + for (int i = 0; i < dim; i++) + { + q->data->llb[i] = p->data->llb[i]; + q->data->uub[i] = p->data->uub[i]; + q->data->shape[i] = p->data->shape[i]; + } + if (pq) + pq->next = q; + else + np = q; + if (first_only) + { + np->next = 0; + return np; + } + pq = q; + p = p->next; + } + return np; +} +MyList *Parallel::gs_subtract(MyList *A, MyList *B) +{ + if (!A) + return 0; + if (!B) + return clone_gsl(A, true); + + double cut_plane[2 * dim], DH[dim]; + + for (int i = 0; i < dim; i++) + { + DH[i] = A->data->Bg->getdX(i); + if (B->data->Bg && !feq(DH[i], B->data->Bg->getdX(i), DH[i] / 2)) + { + cout << "Parallel::gs_subtract meets different grid segment " << DH[i] << " vs " << B->data->Bg->getdX(i) << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + + MyList *C = 0, *q; + for (int i = 0; i < dim; i++) + { + if (B->data->llb[i] > A->data->uub[i] || B->data->uub[i] < A->data->llb[i]) + return clone_gsl(A, true); + cut_plane[i] = A->data->llb[i]; + cut_plane[i + dim] = A->data->uub[i]; + } + + for (int i = 0; i < dim; i++) + { + cut_plane[i] = Mymax(A->data->llb[i], B->data->llb[i]); + if (cut_plane[i] - A->data->llb[i] > DH[i] / 2) + { + q = clone_gsl(A, true); + // prolong the list from head + if (C) + q->next = C; + C = q; + for (int j = 0; j < dim; j++) + { + if (i == j) + { + C->data->llb[i] = A->data->llb[i]; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + C->data->uub[i] = Mymax(C->data->llb[i], cut_plane[i] - DH[i]); +#else +#ifdef Cell + C->data->uub[i] = Mymax(C->data->llb[i], cut_plane[i]); +#else +#error Not define Vertex nor Cell +#endif +#endif + } + else + { + C->data->llb[j] = cut_plane[j]; + C->data->uub[j] = cut_plane[j + dim]; + } +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + C->data->shape[j] = int((C->data->uub[j] - C->data->llb[j]) / DH[j] + 0.4) + 1; +#else +#ifdef Cell + C->data->shape[j] = int((C->data->uub[j] - C->data->llb[j]) / DH[j] + 0.4); +#else +#error Not define Vertex nor Cell +#endif +#endif + } + } + + cut_plane[i + dim] = Mymin(A->data->uub[i], B->data->uub[i]); + if (A->data->uub[i] - cut_plane[i + dim] > DH[i] / 2) + { + q = clone_gsl(A, true); + if (C) + q->next = C; + C = q; + for (int j = 0; j < dim; j++) + { + if (i == j) + { + C->data->uub[i] = A->data->uub[i]; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + C->data->llb[i] = Mymin(C->data->uub[i], cut_plane[i + dim] + DH[i]); +#else +#ifdef Cell + C->data->llb[i] = Mymin(C->data->uub[i], cut_plane[i + dim]); +#else +#error Not define Vertex nor Cell +#endif +#endif + } + else + { + C->data->llb[j] = cut_plane[j]; + C->data->uub[j] = cut_plane[j + dim]; + } +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + C->data->shape[j] = int((C->data->uub[j] - C->data->llb[j]) / DH[j] + 0.4) + 1; +#else +#ifdef Cell + C->data->shape[j] = int((C->data->uub[j] - C->data->llb[j]) / DH[j] + 0.4); +#else +#error Not define Vertex nor Cell +#endif +#endif + } + } + } + return C; +} +// stupid method +/* +MyList *Parallel::gsl_subtract(MyList *A,MyList *B) //A subtract B but with A's information +{ +// always make return and A, B distinct + if(!A) return 0; + + if(!B) return clone_gsl(A,0); + + MyList *C=0,*C0,*C1,*Cc,*CC0,*gs; + + while(A) + { + C0=gs_subtract(A,B); // note C0 becomes a list after subtraction + C1=B->next; + while(C1) + { + CC0=C0; + Cc=0; + while(CC0) + { + gs=gs_subtract(CC0,C1); + if(Cc) Cc->catList(gs); + else Cc=gs; + CC0=CC0->next; + } + if(C0) C0->destroyList(); + C0=Cc; + C1=C1->next; + } + if(C) C->catList(C0); + else C=C0; + A=A->next; + } + + return C; +} +*/ +// more clever method +MyList *Parallel::gsl_subtract(MyList *A, MyList *B) // A subtract B but with A's information +{ + // always make return and A, B distinct + if (!A) + return 0; + + MyList *C = 0, *C0, *C1; + + C = clone_gsl(A, 0); + + while (B) + { + C0 = 0; + C1 = C; + while (C1) + { + if (C0) + C0->catList(gs_subtract(C1, B)); + else + C0 = gs_subtract(C1, B); + C1 = C1->next; + } + if (C) + C->destroyList(); + else + { + if (C0) + C0->destroyList(); + return 0; + } + + C = C0; + B = B->next; + } + + return C; +} +MyList *Parallel::gs_and(MyList *A, MyList *B) +{ + if (!A || !B) + return 0; + + double llb[dim], uub[dim]; + bool flag = false; + for (int i = 0; i < dim; i++) + { + llb[i] = Mymax(A->data->llb[i], B->data->llb[i]); + uub[i] = Mymin(A->data->uub[i], B->data->uub[i]); + if (llb[i] > uub[i]) + { + flag = true; + break; + } + } + if (flag) + return 0; + + MyList *C; + C = clone_gsl(A, true); + for (int i = 0; i < dim; i++) + { + C->data->llb[i] = llb[i]; + C->data->uub[i] = uub[i]; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + C->data->shape[i] = int((C->data->uub[i] - C->data->llb[i]) / C->data->Bg->getdX(i) + 0.4) + 1; +#else +#ifdef Cell + C->data->shape[i] = int((C->data->uub[i] - C->data->llb[i]) / C->data->Bg->getdX(i) + 0.4); +#else +#error Not define Vertex nor Cell +#endif +#endif + } + + return C; +} +// overlap of A_i and (union of all j of B_j) +MyList *Parallel::gsl_and(MyList *A, MyList *B) // A and B but with A's information +{ + MyList *C = 0, *C1; + + while (A) + { + C1 = B; + while (C1) + { + if (C) + C->catList(gs_and(A, C1)); + else + C = gs_and(A, C1); + C1 = C1->next; + } + A = A->next; + } + return C; +} +// collect all ghost grid segments or blocks for given patch +MyList *Parallel::build_ghost_gsl(Patch *Pat) +{ + MyList *cgsl = 0, *gs, *gsb; + MyList *BP = Pat->blb; + while (BP) + { + gs = new MyList; + gs->data = new Parallel::gridseg; + + for (int i = 0; i < dim; i++) + { + gs->data->llb[i] = BP->data->bbox[i]; + gs->data->uub[i] = BP->data->bbox[dim + i]; + gs->data->shape[i] = BP->data->shape[i]; + } + gs->data->Bg = BP->data; + gs->next = 0; + + gsb = build_bulk_gsl(BP->data, Pat); + + if (!cgsl) + cgsl = gs_subtract(gs, gsb); + else + cgsl->catList(gs_subtract(gs, gsb)); + + gsb->destroyList(); + gs->destroyList(); + + if (BP == Pat->ble) + break; + BP = BP->next; + } + + return cgsl; +} +// collect all ghost grid segments or blocks for given patch list +MyList *Parallel::build_ghost_gsl(MyList *PatL) +{ + MyList *cgsl = 0, *gs; + while (PatL) + { + if (!cgsl) + { + cgsl = build_ghost_gsl(PatL->data); + gs = cgsl; + while (gs->next) + gs = gs->next; + } + else + { + gs->next = build_ghost_gsl(PatL->data); + gs = gs->next; + while (gs->next) + gs = gs->next; + } + PatL = PatL->next; + } + + return cgsl; +} +// collect all grid segments or blocks without ghost for given patch +// special for Sync usage, so we do not need consider missing points +MyList *Parallel::build_owned_gsl0(Patch *Pat, int rank_in) +{ + MyList *cgsl = 0, *gs; + MyList *BP = Pat->blb; + while (BP) + { + Block *bp = BP->data; + if (bp->rank == rank_in) + { + if (!cgsl) + { + cgsl = gs = new MyList; + gs->data = new Parallel::gridseg; + } + else + { + gs->next = new MyList; + gs = gs->next; + gs->data = new Parallel::gridseg; + } + + for (int i = 0; i < dim; i++) + { + double DH = bp->getdX(i); + gs->data->uub[i] = (feq(bp->bbox[dim + i], Pat->bbox[dim + i], DH / 2)) ? bp->bbox[dim + i] : bp->bbox[dim + i] - ghost_width * DH; + gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] : bp->bbox[i] + ghost_width * DH; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1; +#else +#ifdef Cell + gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4); +#else +#error Not define Vertex nor Cell +#endif +#endif + } + gs->data->Bg = BP->data; + gs->next = 0; + } + + if (BP == Pat->ble) + break; + BP = BP->next; + } + + return cgsl; +} +// collect all grid segments or blocks without ghost for given patch +MyList *Parallel::build_owned_gsl1(Patch *Pat, int rank_in) +{ + MyList *cgsl = 0, *gs; + MyList *BP = Pat->blb; + while (BP) + { + Block *bp = BP->data; + if (bp->rank == rank_in) + { + if (!cgsl) + { + cgsl = gs = new MyList; + gs->data = new Parallel::gridseg; + } + else + { + gs->next = new MyList; + gs = gs->next; + gs->data = new Parallel::gridseg; + } + + for (int i = 0; i < dim; i++) + { + double DH = bp->getdX(i); + gs->data->uub[i] = (feq(bp->bbox[dim + i], Pat->bbox[dim + i], DH / 2)) ? bp->bbox[dim + i] : bp->bbox[dim + i] - ghost_width * DH; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + // NOTE: our dividing structure is (exclude ghost) + // -1 0 + // 1 2 + // so (0,1) does not belong to any part for vertex structure, we always put it to right part, this is consistent to + // the fortran routine where we always take floor to get index + gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] : bp->bbox[i] + (ghost_width - 1) * DH; + gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1; +#else +#ifdef Cell + gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] : bp->bbox[i] + ghost_width * DH; + gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4); +#else +#error Not define Vertex nor Cell +#endif +#endif + } + gs->data->Bg = BP->data; + gs->next = 0; + } + + if (BP == Pat->ble) + break; + BP = BP->next; + } + + return cgsl; +} +// collect all grid segments or blocks without ghost nor buffer for given patch +MyList *Parallel::build_owned_gsl2(Patch *Pat, int rank_in) +{ + MyList *cgsl = 0, *gs; + MyList *BP = Pat->blb; + while (BP) + { + Block *bp = BP->data; + if (bp->rank == rank_in) + { + if (!cgsl) + { + cgsl = gs = new MyList; + gs->data = new Parallel::gridseg; + } + else + { + gs->next = new MyList; + gs = gs->next; + gs->data = new Parallel::gridseg; + } + + for (int i = 0; i < dim; i++) + { + double DH = bp->getdX(i); + gs->data->uub[i] = (feq(bp->bbox[dim + i], Pat->bbox[dim + i], DH / 2)) ? bp->bbox[dim + i] - Pat->uui[i] * DH : bp->bbox[dim + i] - ghost_width * DH; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + // NOTE: our dividing structure is (exclude ghost) + // -1 0 + // 1 2 + // so (0,1) does not belong to any part for vertex structure, we always put it to right part, this is consistent to + // the fortran routine where we always take floor to get index + gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] + Pat->lli[i] * DH : bp->bbox[i] + (ghost_width - 1) * DH; + gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1; +#else +#ifdef Cell + gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] + Pat->lli[i] * DH : bp->bbox[i] + ghost_width * DH; + gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4); +#else +#error Not define Vertex nor Cell +#endif +#endif + } + gs->data->Bg = BP->data; + gs->next = 0; + } + + if (BP == Pat->ble) + break; + BP = BP->next; + } + + return cgsl; +} +// collect all grid segments or blocks without ghost for given patch, and delete the ghost_width for interpolation consideration on the patch boundary +MyList *Parallel::build_owned_gsl3(Patch *Pat, int rank_in, int Symmetry) +{ + MyList *cgsl = 0, *gs; + MyList *BP = Pat->blb; + while (BP) + { + Block *bp = BP->data; + if (bp->rank == rank_in) + { + if (!cgsl) + { + cgsl = gs = new MyList; + gs->data = new Parallel::gridseg; + } + else + { + gs->next = new MyList; + gs = gs->next; + gs->data = new Parallel::gridseg; + } + + for (int i = 0; i < dim; i++) + { + double DH = bp->getdX(i); + gs->data->uub[i] = bp->bbox[dim + i] - ghost_width * DH; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + // NOTE: our dividing structure is (exclude ghost) + // -1 0 + // 1 2 + // so (0,1) does not belong to any part for vertex structure, we always put it to right part, this is consistent to + // the fortran routine where we always take floor to get index + gs->data->llb[i] = bp->bbox[i] + (ghost_width - 1) * DH; + gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1; +#else +#ifdef Cell + gs->data->llb[i] = bp->bbox[i] + ghost_width * DH; + gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4); +#else +#error Not define Vertex nor Cell +#endif +#endif + } + // Symmetry consideration + if (Symmetry > 0) + { + double DH = bp->getdX(2); + if (feq(bp->bbox[2], 0, DH / 2)) + { + gs->data->llb[2] = bp->bbox[2]; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + gs->data->shape[2] = int((gs->data->uub[2] - gs->data->llb[2]) / DH + 0.4) + 1; +#else +#ifdef Cell + gs->data->shape[2] = int((gs->data->uub[2] - gs->data->llb[2]) / DH + 0.4); +#else +#error Not define Vertex nor Cell +#endif +#endif + } + if (Symmetry > 1) + { + for (int i = 0; i < 2; i++) + { + DH = bp->getdX(i); + if (feq(bp->bbox[i], 0, DH / 2)) + { + gs->data->llb[i] = bp->bbox[i]; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1; +#else +#ifdef Cell + gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4); +#else +#error Not define Vertex nor Cell +#endif +#endif + } + } + } + } + + gs->data->Bg = BP->data; + gs->next = 0; + } + + if (BP == Pat->ble) + break; + BP = BP->next; + } + + return cgsl; +} +// collect all grid segments or blocks without ghost nor buffer for given patch, +// and delete the ghost_width for interpolation consideration on the patch boundary +MyList *Parallel::build_owned_gsl4(Patch *Pat, int rank_in, int Symmetry) +{ + MyList *cgsl = 0, *gs; + MyList *BP = Pat->blb; + while (BP) + { + Block *bp = BP->data; + if (bp->rank == rank_in) + { + if (!cgsl) + { + cgsl = gs = new MyList; + gs->data = new Parallel::gridseg; + } + else + { + gs->next = new MyList; + gs = gs->next; + gs->data = new Parallel::gridseg; + } + + for (int i = 0; i < dim; i++) + { + double DH = bp->getdX(i); + gs->data->uub[i] = (feq(bp->bbox[dim + i], Pat->bbox[dim + i], DH / 2)) ? bp->bbox[dim + i] - Pat->uui[i] * DH : bp->bbox[dim + i]; + gs->data->uub[i] -= ghost_width * DH; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + // NOTE: our dividing structure is (exclude ghost) + // -1 0 + // 1 2 + // so (0,1) does not belong to any part for vertex structure, we always put it to right part, this is consistent to + // the fortran routine where we always take floor to get index + gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] + Pat->lli[i] * DH : bp->bbox[i]; + gs->data->llb[i] += (ghost_width - 1) * DH; + gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1; +#else +#ifdef Cell + gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] + Pat->lli[i] * DH : bp->bbox[i]; + gs->data->llb[i] += ghost_width * DH; + gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4); +#else +#error Not define Vertex nor Cell +#endif +#endif + } + // Symmetry consideration + if (Symmetry > 0) + { + double DH = bp->getdX(2); + if (feq(bp->bbox[2], 0, DH / 2)) + { + gs->data->llb[2] = bp->bbox[2]; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + gs->data->shape[2] = int((gs->data->uub[2] - gs->data->llb[2]) / DH + 0.4) + 1; +#else +#ifdef Cell + gs->data->shape[2] = int((gs->data->uub[2] - gs->data->llb[2]) / DH + 0.4); +#else +#error Not define Vertex nor Cell +#endif +#endif + } + if (Symmetry > 1) + { + for (int i = 0; i < 2; i++) + { + DH = bp->getdX(i); + if (feq(bp->bbox[i], 0, DH / 2)) + { + gs->data->llb[i] = bp->bbox[i]; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1; +#else +#ifdef Cell + gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4); +#else +#error Not define Vertex nor Cell +#endif +#endif + } + } + } + } + + gs->data->Bg = BP->data; + gs->next = 0; + } + + if (BP == Pat->ble) + break; + BP = BP->next; + } + + return cgsl; +} +// collect all grid segments or blocks without ghost nor buffer for given patch, no extention +MyList *Parallel::build_owned_gsl5(Patch *Pat, int rank_in) +{ + MyList *cgsl = 0, *gs; + MyList *BP = Pat->blb; + while (BP) + { + Block *bp = BP->data; + if (bp->rank == rank_in) + { + if (!cgsl) + { + cgsl = gs = new MyList; + gs->data = new Parallel::gridseg; + } + else + { + gs->next = new MyList; + gs = gs->next; + gs->data = new Parallel::gridseg; + } + + for (int i = 0; i < dim; i++) + { + double DH = bp->getdX(i); + gs->data->uub[i] = (feq(bp->bbox[dim + i], Pat->bbox[dim + i], DH / 2)) ? bp->bbox[dim + i] - Pat->uui[i] * DH : bp->bbox[dim + i] - ghost_width * DH; + gs->data->llb[i] = (feq(bp->bbox[i], Pat->bbox[i], DH / 2)) ? bp->bbox[i] + Pat->lli[i] * DH : bp->bbox[i] + ghost_width * DH; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4) + 1; +#else +#ifdef Cell + gs->data->shape[i] = int((gs->data->uub[i] - gs->data->llb[i]) / DH + 0.4); +#else +#error Not define Vertex nor Cell +#endif +#endif + } + gs->data->Bg = BP->data; + gs->next = 0; + } + + if (BP == Pat->ble) + break; + BP = BP->next; + } + + return cgsl; +} +// collect all grid segments or blocks without ghost for given patch list +// stupid method +/* +MyList *Parallel::build_owned_gsl(MyList *PatL,int rank_in,int type,int Symmetry) +{ + MyList *cgsl=0,*gs; + while(PatL) + { + if(!cgsl) + { + switch(type) + { + case 0: + cgsl = build_owned_gsl0(PatL->data,rank_in); + break; + case 1: + cgsl = build_owned_gsl1(PatL->data,rank_in); + break; + case 2: + cgsl = build_owned_gsl2(PatL->data,rank_in); + break; + case 3: + cgsl = build_owned_gsl3(PatL->data,rank_in,Symmetry); + break; + case 4: + cgsl = build_owned_gsl4(PatL->data,rank_in,Symmetry); + break; + case 5: + cgsl = build_owned_gsl5(PatL->data,rank_in); + break; + default: + cout<<"Parallel::build_owned_gsl : unknown type = "<next) gs = gs->next; + } + else + { + switch(type) + { + case 0: + gs->next = build_owned_gsl0(PatL->data,rank_in); + break; + case 1: + gs->next = build_owned_gsl1(PatL->data,rank_in); + break; + case 2: + gs->next = build_owned_gsl2(PatL->data,rank_in); + break; + case 3: + gs->next = build_owned_gsl3(PatL->data,rank_in,Symmetry); + break; + case 4: + gs->next = build_owned_gsl4(PatL->data,rank_in,Symmetry); + break; + case 5: + gs->next = build_owned_gsl5(PatL->data,rank_in); + break; + default: + cout<<"Parallel::build_owned_gsl : unknown type = "<next) gs = gs->next; + } + PatL = PatL->next; + } + + return cgsl; +} +*/ +// more clever method +MyList *Parallel::build_owned_gsl(MyList *PatL, int rank_in, int type, int Symmetry) +{ + MyList *cgsl = 0, *gs; + while (PatL) + { + switch (type) + { + case 0: + gs = build_owned_gsl0(PatL->data, rank_in); + break; + case 1: + gs = build_owned_gsl1(PatL->data, rank_in); + break; + case 2: + gs = build_owned_gsl2(PatL->data, rank_in); + break; + case 3: + gs = build_owned_gsl3(PatL->data, rank_in, Symmetry); + break; + case 4: + gs = build_owned_gsl4(PatL->data, rank_in, Symmetry); + break; + case 5: + gs = build_owned_gsl5(PatL->data, rank_in); + break; + default: + cout << "Parallel::build_owned_gsl : unknown type = " << type << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + if (cgsl) + cgsl->catList(gs); + else + cgsl = gs; + PatL = PatL->next; + } + + return cgsl; +} +// according to overlape to determine real grid segments +void Parallel::build_gstl(MyList *srci, MyList *dsti, + MyList **out_src, MyList **out_dst) +{ + *out_src = *out_dst = 0; + + if (!srci || !dsti) + return; + + MyList *s, *d; + MyList *s2, *d2; + + double llb[dim], uub[dim]; + + s = srci; + while (s) + { + Parallel::gridseg *sd = s->data; + d = dsti; + while (d) + { + Parallel::gridseg *dd = d->data; + bool flag = true; + for (int i = 0; i < dim; i++) + { + double SH = sd->Bg->getdX(i), DH = dd->Bg->getdX(i); + llb[i] = Mymax(sd->llb[i], dd->llb[i]); + uub[i] = Mymin(sd->uub[i], dd->uub[i]); + // make sure the region boundary is consistent to the grids + // here we only judge if the domain is empty, so do not need to adjust the align + double lb = llb[i], ub = uub[i]; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + // ---*--- + // x-------x + // if (int(2*(sd->uub[i]-uub[i])/SH+0.4)%2 == 1) ub = uub[i]-SH/2; + // else if(int(2*(dd->uub[i]-uub[i])/DH+0.4)%2 == 1) ub = uub[i]-DH/2; + // if (int(2*(llb[i]-sd->llb[i])/SH+0.4)%2 == 1) lb = llb[i]+SH/2; + // else if(int(2*(llb[i]-dd->llb[i])/DH+0.4)%2 == 1) lb = llb[i]+DH/2; + if (lb > ub + Mymin(SH, DH) / 2) + { + flag = false; + break; + } // special for isolated point +#else +#ifdef Cell + // |------| + // |-------------| + // if (int(2*(sd->uub[i]-uub[i])/SH+0.4)%2 == 1) ub = uub[i]+SH/2; + // else if(int(2*(dd->uub[i]-uub[i])/DH+0.4)%2 == 1) ub = uub[i]+DH/2; + // |------| + // |-------------| + // if (int(2*(llb[i]-sd->llb[i])/SH+0.4)%2 == 1) lb = llb[i]-SH/2; + // else if(int(2*(llb[i]-dd->llb[i])/DH+0.4)%2 == 1) lb = llb[i]-DH/2; + if (ub - lb < Mymin(SH, DH) / 2) + { + flag = false; + break; + } // even for isolated point, it has a cell belong to it +#else +#error Not define Vertex nor Cell +#endif +#endif + } + + if (flag) + { + if (!(*out_src)) + { + *out_src = s2 = new MyList; + *out_dst = d2 = new MyList; + s2->data = new Parallel::gridseg; + d2->data = new Parallel::gridseg; + } + else + { + s2->next = new MyList; + s2 = s2->next; + d2->next = new MyList; + d2 = d2->next; + s2->data = new Parallel::gridseg; + d2->data = new Parallel::gridseg; + } + + for (int i = 0; i < dim; i++) + { + double SH = sd->Bg->getdX(i), DH = dd->Bg->getdX(i); + s2->data->llb[i] = d2->data->llb[i] = llb[i]; + s2->data->uub[i] = d2->data->uub[i] = uub[i]; +// using float method to count point, we do not need following consideration (2012 nov 17) +#if 1 + +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + // old code distuinguish vertex and cell + // if (int(2*(sd->uub[i]-uub[i])/SH+0.4)%2 == 1) s2->data->uub[i] = uub[i]-SH/2; + // else if(int(2*(dd->uub[i]-uub[i])/DH+0.4)%2 == 1) d2->data->uub[i] = uub[i]-DH/2; + // if (int(2*(llb[i]-sd->llb[i])/SH+0.4)%2 == 1) s2->data->llb[i] = llb[i]+SH/2; + // else if(int(2*(llb[i]-dd->llb[i])/DH+0.4)%2 == 1) d2->data->llb[i] = llb[i]+DH/2; + // new code: here we concern much more about missing point, because overlaping domain has been gaureented above + if (int(2 * (sd->uub[i] - uub[i]) / SH + 0.4) % 2 == 1) + s2->data->uub[i] = uub[i] + SH / 2; + else if (int(2 * (dd->uub[i] - uub[i]) / DH + 0.4) % 2 == 1) + d2->data->uub[i] = uub[i] + DH / 2; + if (int(2 * (llb[i] - sd->llb[i]) / SH + 0.4) % 2 == 1) + s2->data->llb[i] = llb[i] - SH / 2; + else if (int(2 * (llb[i] - dd->llb[i]) / DH + 0.4) % 2 == 1) + d2->data->llb[i] = llb[i] - DH / 2; + s2->data->shape[i] = int((s2->data->uub[i] - s2->data->llb[i]) / SH + 0.4) + 1; + d2->data->shape[i] = int((d2->data->uub[i] - d2->data->llb[i]) / DH + 0.4) + 1; +#else +#ifdef Cell + if (int(2 * (sd->uub[i] - uub[i]) / SH + 0.4) % 2 == 1) + s2->data->uub[i] = uub[i] + SH / 2; + else if (int(2 * (dd->uub[i] - uub[i]) / DH + 0.4) % 2 == 1) + d2->data->uub[i] = uub[i] + DH / 2; + if (int(2 * (llb[i] - sd->llb[i]) / SH + 0.4) % 2 == 1) + s2->data->llb[i] = llb[i] - SH / 2; + else if (int(2 * (llb[i] - dd->llb[i]) / DH + 0.4) % 2 == 1) + d2->data->llb[i] = llb[i] - DH / 2; + s2->data->shape[i] = int((s2->data->uub[i] - s2->data->llb[i]) / SH + 0.4); + d2->data->shape[i] = int((d2->data->uub[i] - d2->data->llb[i]) / DH + 0.4); +#else +#error Not define Vertex nor Cell +#endif +#endif + +#endif + s2->data->illb[i] = sd->illb[i]; + d2->data->illb[i] = dd->illb[i]; + s2->data->iuub[i] = sd->iuub[i]; + d2->data->iuub[i] = dd->iuub[i]; + } + s2->data->Bg = sd->Bg; + s2->next = 0; + d2->data->Bg = dd->Bg; + d2->next = 0; + } + d = d->next; + } + s = s->next; + } +} +// PACK: prepare target data in 'data' +// UNPACK: copy target data from 'data' to corresponding numerical grids +int Parallel::data_packer(double *data, MyList *src, MyList *dst, int rank_in, int dir, + MyList *VarLists /* source */, MyList *VarListd /* target */, int Symmetry) +{ + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + int DIM = dim; + + if (dir != PACK && dir != UNPACK) + { + cout << "error dir " << dir << " for data_packer " << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + int size_out = 0; + + if (!src || !dst) + return size_out; + + MyList *varls, *varld; + + varls = VarLists; + varld = VarListd; + while (varls && varld) + { + varls = varls->next; + varld = varld->next; + } + + if (varls || varld) + { + cout << "error in short data packer, var lists does not match." << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + int type; /* 1 copy, 2 restrict, 3 prolong */ + if (src->data->Bg->lev == dst->data->Bg->lev) + type = 1; + else if (src->data->Bg->lev > dst->data->Bg->lev) + type = 2; + else + type = 3; + + while (src && dst) + { + if ((dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) || + (dir == UNPACK && src->data->Bg->rank == rank_in && dst->data->Bg->rank == myrank)) + { + varls = VarLists; + varld = VarListd; + while (varls && varld) + { + if (data) + { + if (dir == PACK) + switch (type) + { + // attention must be paied to the difference between src's llb,uub and dst's llb,uub + case 1: + f_copy(DIM, dst->data->llb, dst->data->uub, dst->data->shape, data + size_out, + src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn], + dst->data->llb, dst->data->uub); + break; + case 2: + f_restrict3(DIM, dst->data->llb, dst->data->uub, dst->data->shape, data + size_out, + src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn], + dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry); + break; + case 3: + f_prolong3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn], + dst->data->llb, dst->data->uub, dst->data->shape, data + size_out, + dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry); + } + if (dir == UNPACK) // from target data to corresponding grid + f_copy(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape, dst->data->Bg->fgfs[varld->data->sgfn], + dst->data->llb, dst->data->uub, dst->data->shape, data + size_out, + dst->data->llb, dst->data->uub); + } + size_out += dst->data->shape[0] * dst->data->shape[1] * dst->data->shape[2]; + varls = varls->next; + varld = varld->next; + } + } + dst = dst->next; + src = src->next; + } + + return size_out; +} +int Parallel::data_packermix(double *data, MyList *src, MyList *dst, int rank_in, int dir, + MyList *VarLists /* source */, MyList *VarListd /* target */, int Symmetry) +{ + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + int DIM = dim; + + if (dir != PACK && dir != UNPACK) + { + cout << "Parallel::data_packermix: error dir " << dir << " for data_packermix." << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + int size_out = 0; + + if (!src || !dst) + return size_out; + + MyList *varls, *varld; + + varls = VarLists; + varld = VarListd; + while (varls && varld) + { + varls = varls->next; + varld = varld->next; + } + + if (varls || varld) + { + cout << "error in short data packer, var lists does not match." << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + int type; /* 1 copy, 2 restrict, 3 prolong */ + if (src->data->Bg->lev == dst->data->Bg->lev) + type = 1; + else if (src->data->Bg->lev > dst->data->Bg->lev) + type = 2; + else + type = 3; + + if (type != 3) + { + cout << "Parallel::data_packermix: error type " << type << " for data_packermix." << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + while (src && dst) + { + if ((dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) || + (dir == UNPACK && src->data->Bg->rank == rank_in && dst->data->Bg->rank == myrank)) + { + varls = VarLists; + varld = VarListd; + while (varls && varld) + { + if (data) + { + if (dir == PACK) + f_prolongcopy3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn], + dst->data->llb, dst->data->uub, src->data->shape, data + size_out, + src->data->llb, src->data->uub, varls->data->SoA, Symmetry); + if (dir == UNPACK) // from target data to corresponding grid + f_prolongmix3(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape, dst->data->Bg->fgfs[varld->data->sgfn], + src->data->llb, src->data->uub, src->data->shape, data + size_out, + dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry, dst->data->illb, dst->data->iuub); + } + // the symmetry problem should be dealt in prolongcopy3, + // so we always have ghost_width for both sides + size_out += (src->data->shape[0] + 2 * ghost_width) * (src->data->shape[1] + 2 * ghost_width) * (src->data->shape[2] + 2 * ghost_width); + varls = varls->next; + varld = varld->next; + } + } + dst = dst->next; + src = src->next; + } + + return size_out; +} +// +void Parallel::transfer(MyList **src, MyList **dst, + MyList *VarList1 /* source */, MyList *VarList2 /*target */, + int Symmetry) +{ + int myrank, cpusize; + MPI_Comm_size(MPI_COMM_WORLD, &cpusize); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + int node; + + MPI_Request *reqs; + MPI_Status *stats; + reqs = new MPI_Request[2 * cpusize]; + stats = new MPI_Status[2 * cpusize]; + int req_no = 0; + + double **send_data, **rec_data; + send_data = new double *[cpusize]; + rec_data = new double *[cpusize]; + int length; + + for (node = 0; node < cpusize; node++) + { + send_data[node] = rec_data[node] = 0; + if (node == myrank) + { + if (length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry)) + { + rec_data[node] = new double[length]; + if (!rec_data[node]) + { + cout << "out of memory when new in short transfer, place 1" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + data_packer(rec_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry); + } + } + else + { + // send from this cpu to cpu#node + if (length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry)) + { + send_data[node] = new double[length]; + if (!send_data[node]) + { + cout << "out of memory when new in short transfer, place 2" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + data_packer(send_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry); + MPI_Isend((void *)send_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++); + } + // receive from cpu#node to this cpu + if (length = data_packer(0, src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry)) + { + rec_data[node] = new double[length]; + if (!rec_data[node]) + { + cout << "out of memory when new in short transfer, place 3" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + MPI_Irecv((void *)rec_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++); + } + } + } + // wait for all requests to complete + MPI_Waitall(req_no, reqs, stats); + + for (node = 0; node < cpusize; node++) + if (rec_data[node]) + data_packer(rec_data[node], src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry); + + for (node = 0; node < cpusize; node++) + { + if (send_data[node]) + delete[] send_data[node]; + if (rec_data[node]) + delete[] rec_data[node]; + } + + delete[] reqs; + delete[] stats; + delete[] send_data; + delete[] rec_data; +} +// +void Parallel::transfermix(MyList **src, MyList **dst, + MyList *VarList1 /* source */, MyList *VarList2 /*target */, + int Symmetry) +{ + int myrank, cpusize; + MPI_Comm_size(MPI_COMM_WORLD, &cpusize); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + int node; + + MPI_Request *reqs; + MPI_Status *stats; + reqs = new MPI_Request[2 * cpusize]; + stats = new MPI_Status[2 * cpusize]; + int req_no = 0; + + double **send_data, **rec_data; + send_data = new double *[cpusize]; + rec_data = new double *[cpusize]; + int length; + + for (node = 0; node < cpusize; node++) + { + send_data[node] = rec_data[node] = 0; + if (node == myrank) + { + if (length = data_packermix(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry)) + { + rec_data[node] = new double[length]; + if (!rec_data[node]) + { + cout << "out of memory when new in short transfer, place 1" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + data_packermix(rec_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry); + } + } + else + { + // send from this cpu to cpu#node + if (length = data_packermix(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry)) + { + send_data[node] = new double[length]; + if (!send_data[node]) + { + cout << "out of memory when new in short transfer, place 2" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + data_packermix(send_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry); + MPI_Isend((void *)send_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++); + } + // receive from cpu#node to this cpu + if (length = data_packermix(0, src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry)) + { + rec_data[node] = new double[length]; + if (!rec_data[node]) + { + cout << "out of memory when new in short transfer, place 3" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + MPI_Irecv((void *)rec_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++); + } + } + } + // wait for all requests to complete + MPI_Waitall(req_no, reqs, stats); + + for (node = 0; node < cpusize; node++) + if (rec_data[node]) + data_packermix(rec_data[node], src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry); + + for (node = 0; node < cpusize; node++) + { + if (send_data[node]) + delete[] send_data[node]; + if (rec_data[node]) + delete[] rec_data[node]; + } + + delete[] reqs; + delete[] stats; + delete[] send_data; + delete[] rec_data; +} +void Parallel::Sync(Patch *Pat, MyList *VarList, int Symmetry) +{ + int cpusize; + MPI_Comm_size(MPI_COMM_WORLD, &cpusize); + + MyList *dst; + MyList **src, **transfer_src, **transfer_dst; + src = new MyList *[cpusize]; + transfer_src = new MyList *[cpusize]; + transfer_dst = new MyList *[cpusize]; + + dst = build_ghost_gsl(Pat); // ghost region only + for (int node = 0; node < cpusize; node++) + { + src[node] = build_owned_gsl0(Pat, node); // for the part without ghost points and do not extend + build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer_src[node], data locate on cpu#node; + // but for transfer_dst[node] the data may locate on any node + } + + transfer(transfer_src, transfer_dst, VarList, VarList, Symmetry); + + if (dst) + dst->destroyList(); + for (int node = 0; node < cpusize; node++) + { + if (src[node]) + src[node]->destroyList(); + if (transfer_src[node]) + transfer_src[node]->destroyList(); + if (transfer_dst[node]) + transfer_dst[node]->destroyList(); + } + + delete[] src; + delete[] transfer_src; + delete[] transfer_dst; +} +void Parallel::Sync(MyList *PatL, MyList *VarList, int Symmetry) +{ + // Patch inner Synch + MyList *Pp = PatL; + while (Pp) + { + Sync(Pp->data, VarList, Symmetry); + Pp = Pp->next; + } + + // Patch inter Synch + int cpusize; + MPI_Comm_size(MPI_COMM_WORLD, &cpusize); + + MyList *dst; + MyList **src, **transfer_src, **transfer_dst; + src = new MyList *[cpusize]; + transfer_src = new MyList *[cpusize]; + transfer_dst = new MyList *[cpusize]; + + dst = build_buffer_gsl(PatL); // buffer region only + for (int node = 0; node < cpusize; node++) + { + src[node] = build_owned_gsl(PatL, node, 5, Symmetry); // for the part without ghost nor buffer points and do not extend + build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node + } + + transfer(transfer_src, transfer_dst, VarList, VarList, Symmetry); + + if (dst) + dst->destroyList(); + for (int node = 0; node < cpusize; node++) + { + if (src[node]) + src[node]->destroyList(); + if (transfer_src[node]) + transfer_src[node]->destroyList(); + if (transfer_dst[node]) + transfer_dst[node]->destroyList(); + } + + delete[] src; + delete[] transfer_src; + delete[] transfer_dst; +} +// Merged Sync: collect all intra-patch and inter-patch grid segment lists, +// then issue a single transfer() call instead of N+1 separate ones. +void Parallel::Sync_merged(MyList *PatL, MyList *VarList, int Symmetry) +{ + int cpusize; + MPI_Comm_size(MPI_COMM_WORLD, &cpusize); + + MyList **combined_src = new MyList *[cpusize]; + MyList **combined_dst = new MyList *[cpusize]; + for (int node = 0; node < cpusize; node++) + combined_src[node] = combined_dst[node] = 0; + + // Phase A: Intra-patch ghost exchange segments + MyList *Pp = PatL; + while (Pp) + { + Patch *Pat = Pp->data; + MyList *dst_ghost = build_ghost_gsl(Pat); + + for (int node = 0; node < cpusize; node++) + { + MyList *src_owned = build_owned_gsl0(Pat, node); + MyList *tsrc = 0, *tdst = 0; + build_gstl(src_owned, dst_ghost, &tsrc, &tdst); + + if (tsrc) + { + if (combined_src[node]) + combined_src[node]->catList(tsrc); + else + combined_src[node] = tsrc; + } + if (tdst) + { + if (combined_dst[node]) + combined_dst[node]->catList(tdst); + else + combined_dst[node] = tdst; + } + + if (src_owned) + src_owned->destroyList(); + } + + if (dst_ghost) + dst_ghost->destroyList(); + + Pp = Pp->next; + } + + // Phase B: Inter-patch buffer exchange segments + MyList *dst_buffer = build_buffer_gsl(PatL); + for (int node = 0; node < cpusize; node++) + { + MyList *src_owned = build_owned_gsl(PatL, node, 5, Symmetry); + MyList *tsrc = 0, *tdst = 0; + build_gstl(src_owned, dst_buffer, &tsrc, &tdst); + + if (tsrc) + { + if (combined_src[node]) + combined_src[node]->catList(tsrc); + else + combined_src[node] = tsrc; + } + if (tdst) + { + if (combined_dst[node]) + combined_dst[node]->catList(tdst); + else + combined_dst[node] = tdst; + } + + if (src_owned) + src_owned->destroyList(); + } + if (dst_buffer) + dst_buffer->destroyList(); + + // Phase C: Single transfer + transfer(combined_src, combined_dst, VarList, VarList, Symmetry); + + // Phase D: Cleanup + for (int node = 0; node < cpusize; node++) + { + if (combined_src[node]) + combined_src[node]->destroyList(); + if (combined_dst[node]) + combined_dst[node]->destroyList(); + } + delete[] combined_src; + delete[] combined_dst; +} +// SyncCache constructor +Parallel::SyncCache::SyncCache() + : valid(false), cpusize(0), combined_src(0), combined_dst(0), + send_lengths(0), recv_lengths(0), send_bufs(0), recv_bufs(0), + send_buf_caps(0), recv_buf_caps(0), reqs(0), stats(0), max_reqs(0), + lengths_valid(false) +{ +} +// SyncCache invalidate: free grid segment lists but keep buffers +void Parallel::SyncCache::invalidate() +{ + if (!valid) + return; + for (int i = 0; i < cpusize; i++) + { + if (combined_src[i]) + combined_src[i]->destroyList(); + if (combined_dst[i]) + combined_dst[i]->destroyList(); + combined_src[i] = combined_dst[i] = 0; + send_lengths[i] = recv_lengths[i] = 0; + } + valid = false; + lengths_valid = false; +} +// SyncCache destroy: free everything +void Parallel::SyncCache::destroy() +{ + invalidate(); + if (combined_src) delete[] combined_src; + if (combined_dst) delete[] combined_dst; + if (send_lengths) delete[] send_lengths; + if (recv_lengths) delete[] recv_lengths; + if (send_buf_caps) delete[] send_buf_caps; + if (recv_buf_caps) delete[] recv_buf_caps; + for (int i = 0; i < cpusize; i++) + { + if (send_bufs && send_bufs[i]) delete[] send_bufs[i]; + if (recv_bufs && recv_bufs[i]) delete[] recv_bufs[i]; + } + if (send_bufs) delete[] send_bufs; + if (recv_bufs) delete[] recv_bufs; + if (reqs) delete[] reqs; + if (stats) delete[] stats; + combined_src = combined_dst = 0; + send_lengths = recv_lengths = 0; + send_buf_caps = recv_buf_caps = 0; + send_bufs = recv_bufs = 0; + reqs = 0; stats = 0; + cpusize = 0; max_reqs = 0; +} +// transfer_cached: reuse pre-allocated buffers from SyncCache +void Parallel::transfer_cached(MyList **src, MyList **dst, + MyList *VarList1, MyList *VarList2, + int Symmetry, SyncCache &cache) +{ + int myrank; + MPI_Comm_size(MPI_COMM_WORLD, &cache.cpusize); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + int cpusize = cache.cpusize; + + int req_no = 0; + int node; + + for (node = 0; node < cpusize; node++) + { + if (node == myrank) + { + int length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry); + cache.recv_lengths[node] = length; + if (length > 0) + { + if (length > cache.recv_buf_caps[node]) + { + if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node]; + cache.recv_bufs[node] = new double[length]; + cache.recv_buf_caps[node] = length; + } + data_packer(cache.recv_bufs[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry); + } + } + else + { + // send + int slength = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry); + cache.send_lengths[node] = slength; + if (slength > 0) + { + if (slength > cache.send_buf_caps[node]) + { + if (cache.send_bufs[node]) delete[] cache.send_bufs[node]; + cache.send_bufs[node] = new double[slength]; + cache.send_buf_caps[node] = slength; + } + data_packer(cache.send_bufs[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry); + MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, cache.reqs + req_no++); + } + // recv + int rlength = data_packer(0, src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry); + cache.recv_lengths[node] = rlength; + if (rlength > 0) + { + if (rlength > cache.recv_buf_caps[node]) + { + if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node]; + cache.recv_bufs[node] = new double[rlength]; + cache.recv_buf_caps[node] = rlength; + } + MPI_Irecv((void *)cache.recv_bufs[node], rlength, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, cache.reqs + req_no++); + } + } + } + + MPI_Waitall(req_no, cache.reqs, cache.stats); + + for (node = 0; node < cpusize; node++) + if (cache.recv_bufs[node] && cache.recv_lengths[node] > 0) + data_packer(cache.recv_bufs[node], src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry); +} +// Sync_cached: build grid segment lists on first call, reuse on subsequent calls +void Parallel::Sync_cached(MyList *PatL, MyList *VarList, int Symmetry, SyncCache &cache) +{ + if (!cache.valid) + { + int cpusize; + MPI_Comm_size(MPI_COMM_WORLD, &cpusize); + cache.cpusize = cpusize; + + // Allocate cache arrays if needed + if (!cache.combined_src) + { + cache.combined_src = new MyList *[cpusize]; + cache.combined_dst = new MyList *[cpusize]; + cache.send_lengths = new int[cpusize]; + cache.recv_lengths = new int[cpusize]; + cache.send_bufs = new double *[cpusize]; + cache.recv_bufs = new double *[cpusize]; + cache.send_buf_caps = new int[cpusize]; + cache.recv_buf_caps = new int[cpusize]; + for (int i = 0; i < cpusize; i++) + { + cache.send_bufs[i] = cache.recv_bufs[i] = 0; + cache.send_buf_caps[i] = cache.recv_buf_caps[i] = 0; + } + cache.max_reqs = 2 * cpusize; + cache.reqs = new MPI_Request[cache.max_reqs]; + cache.stats = new MPI_Status[cache.max_reqs]; + } + + for (int node = 0; node < cpusize; node++) + { + cache.combined_src[node] = cache.combined_dst[node] = 0; + cache.send_lengths[node] = cache.recv_lengths[node] = 0; + } + + // Build intra-patch segments (same as Sync_merged Phase A) + MyList *Pp = PatL; + while (Pp) + { + Patch *Pat = Pp->data; + MyList *dst_ghost = build_ghost_gsl(Pat); + for (int node = 0; node < cpusize; node++) + { + MyList *src_owned = build_owned_gsl0(Pat, node); + MyList *tsrc = 0, *tdst = 0; + build_gstl(src_owned, dst_ghost, &tsrc, &tdst); + if (tsrc) + { + if (cache.combined_src[node]) + cache.combined_src[node]->catList(tsrc); + else + cache.combined_src[node] = tsrc; + } + if (tdst) + { + if (cache.combined_dst[node]) + cache.combined_dst[node]->catList(tdst); + else + cache.combined_dst[node] = tdst; + } + if (src_owned) src_owned->destroyList(); + } + if (dst_ghost) dst_ghost->destroyList(); + Pp = Pp->next; + } + + // Build inter-patch segments (same as Sync_merged Phase B) + MyList *dst_buffer = build_buffer_gsl(PatL); + for (int node = 0; node < cpusize; node++) + { + MyList *src_owned = build_owned_gsl(PatL, node, 5, Symmetry); + MyList *tsrc = 0, *tdst = 0; + build_gstl(src_owned, dst_buffer, &tsrc, &tdst); + if (tsrc) + { + if (cache.combined_src[node]) + cache.combined_src[node]->catList(tsrc); + else + cache.combined_src[node] = tsrc; + } + if (tdst) + { + if (cache.combined_dst[node]) + cache.combined_dst[node]->catList(tdst); + else + cache.combined_dst[node] = tdst; + } + if (src_owned) src_owned->destroyList(); + } + if (dst_buffer) dst_buffer->destroyList(); + + cache.valid = true; + } + + // Use cached lists with buffer-reusing transfer + transfer_cached(cache.combined_src, cache.combined_dst, VarList, VarList, Symmetry, cache); +} +// Sync_start: pack and post MPI_Isend/Irecv, return immediately +void Parallel::Sync_start(MyList *PatL, MyList *VarList, int Symmetry, + SyncCache &cache, AsyncSyncState &state) +{ + // Ensure cache is built + if (!cache.valid) + { + // Build cache (same logic as Sync_cached) + int cpusize; + MPI_Comm_size(MPI_COMM_WORLD, &cpusize); + cache.cpusize = cpusize; + + if (!cache.combined_src) + { + cache.combined_src = new MyList *[cpusize]; + cache.combined_dst = new MyList *[cpusize]; + cache.send_lengths = new int[cpusize]; + cache.recv_lengths = new int[cpusize]; + cache.send_bufs = new double *[cpusize]; + cache.recv_bufs = new double *[cpusize]; + cache.send_buf_caps = new int[cpusize]; + cache.recv_buf_caps = new int[cpusize]; + for (int i = 0; i < cpusize; i++) + { + cache.send_bufs[i] = cache.recv_bufs[i] = 0; + cache.send_buf_caps[i] = cache.recv_buf_caps[i] = 0; + } + cache.max_reqs = 2 * cpusize; + cache.reqs = new MPI_Request[cache.max_reqs]; + cache.stats = new MPI_Status[cache.max_reqs]; + } + + for (int node = 0; node < cpusize; node++) + { + cache.combined_src[node] = cache.combined_dst[node] = 0; + cache.send_lengths[node] = cache.recv_lengths[node] = 0; + } + + MyList *Pp = PatL; + while (Pp) + { + Patch *Pat = Pp->data; + MyList *dst_ghost = build_ghost_gsl(Pat); + for (int node = 0; node < cpusize; node++) + { + MyList *src_owned = build_owned_gsl0(Pat, node); + MyList *tsrc = 0, *tdst = 0; + build_gstl(src_owned, dst_ghost, &tsrc, &tdst); + if (tsrc) + { + if (cache.combined_src[node]) + cache.combined_src[node]->catList(tsrc); + else + cache.combined_src[node] = tsrc; + } + if (tdst) + { + if (cache.combined_dst[node]) + cache.combined_dst[node]->catList(tdst); + else + cache.combined_dst[node] = tdst; + } + if (src_owned) src_owned->destroyList(); + } + if (dst_ghost) dst_ghost->destroyList(); + Pp = Pp->next; + } + + MyList *dst_buffer = build_buffer_gsl(PatL); + for (int node = 0; node < cpusize; node++) + { + MyList *src_owned = build_owned_gsl(PatL, node, 5, Symmetry); + MyList *tsrc = 0, *tdst = 0; + build_gstl(src_owned, dst_buffer, &tsrc, &tdst); + if (tsrc) + { + if (cache.combined_src[node]) + cache.combined_src[node]->catList(tsrc); + else + cache.combined_src[node] = tsrc; + } + if (tdst) + { + if (cache.combined_dst[node]) + cache.combined_dst[node]->catList(tdst); + else + cache.combined_dst[node] = tdst; + } + if (src_owned) src_owned->destroyList(); + } + if (dst_buffer) dst_buffer->destroyList(); + cache.valid = true; + } + + // Now pack and post async MPI operations + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + int cpusize = cache.cpusize; + state.req_no = 0; + state.active = true; + + MyList **src = cache.combined_src; + MyList **dst = cache.combined_dst; + + for (int node = 0; node < cpusize; node++) + { + if (node == myrank) + { + int length; + if (!cache.lengths_valid) { + length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry); + cache.recv_lengths[node] = length; + } else { + length = cache.recv_lengths[node]; + } + if (length > 0) + { + if (length > cache.recv_buf_caps[node]) + { + if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node]; + cache.recv_bufs[node] = new double[length]; + cache.recv_buf_caps[node] = length; + } + data_packer(cache.recv_bufs[node], src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry); + } + } + else + { + int slength; + if (!cache.lengths_valid) { + slength = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry); + cache.send_lengths[node] = slength; + } else { + slength = cache.send_lengths[node]; + } + if (slength > 0) + { + if (slength > cache.send_buf_caps[node]) + { + if (cache.send_bufs[node]) delete[] cache.send_bufs[node]; + cache.send_bufs[node] = new double[slength]; + cache.send_buf_caps[node] = slength; + } + data_packer(cache.send_bufs[node], src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry); + MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, 2, MPI_COMM_WORLD, cache.reqs + state.req_no++); + } + int rlength; + if (!cache.lengths_valid) { + rlength = data_packer(0, src[node], dst[node], node, UNPACK, VarList, VarList, Symmetry); + cache.recv_lengths[node] = rlength; + } else { + rlength = cache.recv_lengths[node]; + } + if (rlength > 0) + { + if (rlength > cache.recv_buf_caps[node]) + { + if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node]; + cache.recv_bufs[node] = new double[rlength]; + cache.recv_buf_caps[node] = rlength; + } + MPI_Irecv((void *)cache.recv_bufs[node], rlength, MPI_DOUBLE, node, 2, MPI_COMM_WORLD, cache.reqs + state.req_no++); + } + } + } + cache.lengths_valid = true; +} +// Sync_finish: wait for async MPI operations and unpack +void Parallel::Sync_finish(SyncCache &cache, AsyncSyncState &state, + MyList *VarList, int Symmetry) +{ + if (!state.active) + return; + + MPI_Waitall(state.req_no, cache.reqs, cache.stats); + + int cpusize = cache.cpusize; + MyList **src = cache.combined_src; + MyList **dst = cache.combined_dst; + + for (int node = 0; node < cpusize; node++) + if (cache.recv_bufs[node] && cache.recv_lengths[node] > 0) + data_packer(cache.recv_bufs[node], src[node], dst[node], node, UNPACK, VarList, VarList, Symmetry); + + state.active = false; +} +// collect buffer grid segments or blocks for the periodic boundary condition of given patch +// --------------------------------------------------- +// |con | |con | +// |ner | PhysBD |ner | +// |-------------------------------------------------| +// | | | | +// |Phy | |Phy | +// |sBD | |BD | +// | | | | +// | | | | +// | | | | +// |-------------------------------------------------| +// |con | PhysBD |con | +// |ner | |ner | +// --------------------------------------------------- +// first order derivetive does not need conner information, +// but second order derivative needs! +/* the following code does not include conner part +MyList *Parallel::build_PhysBD_gsl(Patch *Pat) +{ + MyList *cgsl,*gsc,*gsb=0,*p; + gsc = build_ghost_gsl(Pat); + for(int i=0;idata->Bg->getdX(i); +// lower boundary + if(gsb) + { + p = new MyList; + p->data = new Parallel::gridseg; + p->next=gsb; + gsb=p; + } + else + { + gsb = new MyList; + gsb->data = new Parallel::gridseg; + gsb->next=0; + } + for(int j=0;jdata->llb[i] = Pat->bbox[i]-ghost_width*DH; + gsb->data->uub[i] = Pat->bbox[i]-DH; +#else +#ifdef Cell + gsb->data->llb[i] = Pat->bbox[i]-ghost_width*DH; + gsb->data->uub[i] = Pat->bbox[i]; +#else +#error Not define Vertex nor Cell +#endif +#endif + gsb->data->shape[i] = ghost_width; + } + else + { + gsb->data->llb[j] = Pat->bbox[j]; + gsb->data->uub[j] = Pat->bbox[j+dim]; + gsb->data->shape[j] = Pat->shape[j]; + } + } + gsb->data->Bg = 0; //vertual grid segment +// upper boundary + p = new MyList; + p->data = new Parallel::gridseg; + p->next=gsb; + gsb=p; + for(int j=0;jdata->llb[i] = Pat->bbox[i+dim]+DH; + gsb->data->uub[i] = Pat->bbox[i+dim]+ghost_width*DH; +#else +#ifdef Cell + gsb->data->llb[i] = Pat->bbox[i+dim]; + gsb->data->uub[i] = Pat->bbox[i+dim]+ghost_width*DH; +#else +#error Not define Vertex nor Cell +#endif +#endif + gsb->data->shape[i] = ghost_width; + } + else + { + gsb->data->llb[j] = Pat->bbox[j]; + gsb->data->uub[j] = Pat->bbox[j+dim]; + gsb->data->shape[j] = Pat->shape[j]; + } + } + gsb->data->Bg = 0; //vertual grid segment + } + + cgsl = gsl_and(gsc,gsb); + + gsc->destroyList(); + gsb->destroyList(); + + return cgsl; +} +*/ +// the following code includes conner part +MyList *Parallel::build_PhysBD_gsl(Patch *Pat) +{ + MyList *cgsl, *gsc, *gsb = 0, *p; + + gsc = build_complete_gsl(Pat); + + gsb = new MyList; + gsb->data = new Parallel::gridseg; + gsb->next = 0; + gsb->data->Bg = 0; + + for (int j = 0; j < dim; j++) + { + gsb->data->llb[j] = Pat->bbox[j]; + gsb->data->uub[j] = Pat->bbox[j + dim]; + gsb->data->shape[j] = Pat->shape[j]; + } + + p = gsl_subtract(gsc, gsb); + + gsc->destroyList(); + gsb->destroyList(); + + cgsl = divide_gsl(p, Pat); + + p->destroyList(); + + return cgsl; +} +MyList *Parallel::divide_gsl(MyList *p, Patch *Pat) +{ + MyList *cgsl = 0; + while (p) + { + if (cgsl) + cgsl->catList(divide_gs(p, Pat)); + else + cgsl = divide_gs(p, Pat); + p = p->next; + } + + return cgsl; +} +// divide the gs into pices which locate either totally outside of the given Patch coordinate range +// or totally inside it. It's usefull for periodic boundary condition +MyList *Parallel::divide_gs(MyList *p, Patch *Pat) +{ + double DH[dim]; + for (int i = 0; i < dim; i++) + { + DH[i] = p->data->Bg->getdX(i); + } + + int num[dim]; + double llb[3][dim], uub[3][dim]; + for (int i = 0; i < dim; i++) + { + if (p->data->llb[i] < Pat->bbox[i] - DH[i] / 2) + { + if (p->data->uub[i] > Pat->bbox[i + dim] + DH[i] / 2) + { + num[i] = 3; + llb[0][i] = p->data->llb[i]; + llb[1][i] = Pat->bbox[i]; + uub[1][i] = Pat->bbox[i + dim]; + uub[2][i] = p->data->uub[i]; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + uub[0][i] = Pat->bbox[i] - DH[i]; + llb[2][i] = Pat->bbox[i + dim] + DH[i]; +#else +#ifdef Cell + uub[0][i] = Pat->bbox[i]; + llb[2][i] = Pat->bbox[i + dim]; +#else +#error Not define Vertex nor Cell +#endif +#endif + } + else if (p->data->uub[i] > Pat->bbox[i] + DH[i] / 2) + { + num[i] = 2; + llb[0][i] = p->data->llb[i]; + llb[1][i] = Pat->bbox[i]; + uub[1][i] = p->data->uub[i]; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + uub[0][i] = Pat->bbox[i] - DH[i]; +#else +#ifdef Cell + uub[0][i] = Pat->bbox[i]; +#else +#error Not define Vertex nor Cell +#endif +#endif + } + else + { + num[i] = 1; + llb[0][i] = p->data->llb[i]; + uub[0][i] = p->data->uub[i]; + } + } + else if (p->data->llb[i] < Pat->bbox[i + dim] - DH[i] / 2) + { + if (p->data->uub[i] > Pat->bbox[i + dim] + DH[i] / 2) + { + num[i] = 2; + llb[0][i] = p->data->llb[i]; + uub[0][i] = Pat->bbox[i + dim]; + uub[1][i] = p->data->uub[i]; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + llb[1][i] = Pat->bbox[i + dim] + DH[i]; +#else +#ifdef Cell + llb[1][i] = Pat->bbox[i + dim]; +#else +#error Not define Vertex nor Cell +#endif +#endif + } + else + { + num[i] = 1; + llb[0][i] = p->data->llb[i]; + uub[0][i] = p->data->uub[i]; + } + } + else + { + num[i] = 1; + llb[0][i] = p->data->llb[i]; + uub[0][i] = p->data->uub[i]; + } + } + MyList *cgsl = 0, *gg; + int NN = 1; + for (int i = 0; i < dim; i++) + NN = NN * num[i]; + + for (int i = 0; i < NN; i++) + { + int ind[dim]; + getarrayindex(dim, num, ind, i); + gg = clone_gsl(p, true); + for (int k = 0; k < dim; k++) + { + gg->data->llb[k] = llb[ind[k]][k]; + gg->data->uub[k] = uub[ind[k]][k]; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + gg->data->shape[k] = int((uub[ind[k]][k] - llb[ind[k]][k]) / DH[k] + 0.4) + 1; +#else +#ifdef Cell + gg->data->shape[k] = int((uub[ind[k]][k] - llb[ind[k]][k]) / DH[k] + 0.4); +#else +#error Not define Vertex nor Cell +#endif +#endif + } + + if (cgsl) + cgsl->catList(gg); + else + cgsl = gg; + } + + return cgsl; +} +// after mod operation, according to overlape to determine real grid segments +void Parallel::build_PhysBD_gstl(Patch *Pat, MyList *srci, MyList *dsti, + MyList **out_src, MyList **out_dst) +{ + *out_src = *out_dst = 0; + + if (!srci || !dsti) + return; + + MyList *s, *d; + MyList *s2, *d2; + + double llb[dim], uub[dim]; + + s = srci; + while (s) + { + Parallel::gridseg *sd = s->data; + d = dsti; + while (d) + { + Parallel::gridseg *dd = d->data; + bool flag = true; + for (int i = 0; i < dim; i++) + { + double SH = sd->Bg->getdX(i), DH = dd->Bg->getdX(i); + if (!feq(SH, DH, SH / 2)) + { + cout << "Parallel::build_PhysBD_gstl meets different grid space SH = " << SH << ", DH = " << DH << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + // we assume dst and src locate on the same Patch + if (dd->llb[i] < Pat->bbox[i]) + llb[i] = Mymax(sd->llb[i], dd->llb[i] + Pat->bbox[dim + i] - Pat->bbox[i]); + else if (dd->llb[i] > Pat->bbox[i + dim]) + llb[i] = Mymax(sd->llb[i], dd->llb[i] - Pat->bbox[dim + i] + Pat->bbox[i]); + else + llb[i] = Mymax(sd->llb[i], dd->llb[i]); + + if (dd->uub[i] < Pat->bbox[i]) + uub[i] = Mymin(sd->uub[i], dd->uub[i] + Pat->bbox[dim + i] - Pat->bbox[i]); + else if (dd->uub[i] > Pat->bbox[dim + i]) + uub[i] = Mymin(sd->uub[i], dd->uub[i] - Pat->bbox[dim + i] + Pat->bbox[i]); + else + uub[i] = Mymin(sd->uub[i], dd->uub[i]); +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + if (llb[i] > uub[i] + SH / 2) + { + flag = false; + break; + } // special for isolated point +#else +#ifdef Cell + if (llb[i] > uub[i]) + { + flag = false; + break; + } +#else +#error Not define Vertex nor Cell +#endif +#endif + } + + if (flag) + { + if (!(*out_src)) + { + *out_src = s2 = new MyList; + *out_dst = d2 = new MyList; + s2->data = new Parallel::gridseg; + d2->data = new Parallel::gridseg; + } + else + { + s2->next = new MyList; + s2 = s2->next; + d2->next = new MyList; + d2 = d2->next; + s2->data = new Parallel::gridseg; + d2->data = new Parallel::gridseg; + } + + for (int i = 0; i < dim; i++) + { + double SH = sd->Bg->getdX(i), DH = dd->Bg->getdX(i); + s2->data->llb[i] = llb[i]; + s2->data->uub[i] = uub[i]; + + if (dd->llb[i] < Pat->bbox[i]) + d2->data->llb[i] = llb[i] - Pat->bbox[dim + i] + Pat->bbox[i]; + else if (dd->llb[i] > Pat->bbox[i + dim]) + d2->data->llb[i] = llb[i] + Pat->bbox[dim + i] - Pat->bbox[i]; + else + d2->data->llb[i] = llb[i]; + + if (dd->uub[i] < Pat->bbox[i]) + d2->data->uub[i] = uub[i] - Pat->bbox[dim + i] + Pat->bbox[i]; + else if (dd->uub[i] > Pat->bbox[dim + i]) + d2->data->uub[i] = uub[i] + Pat->bbox[dim + i] - Pat->bbox[i]; + else + d2->data->uub[i] = uub[i]; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + s2->data->shape[i] = int((s2->data->uub[i] - s2->data->llb[i]) / SH + 0.4) + 1; + d2->data->shape[i] = int((d2->data->uub[i] - d2->data->llb[i]) / DH + 0.4) + 1; +#else +#ifdef Cell + s2->data->shape[i] = int((s2->data->uub[i] - s2->data->llb[i]) / SH + 0.4); + d2->data->shape[i] = int((d2->data->uub[i] - d2->data->llb[i]) / DH + 0.4); +#else +#error Not define Vertex nor Cell +#endif +#endif + } + s2->data->Bg = sd->Bg; + s2->next = 0; + d2->data->Bg = dd->Bg; + d2->next = 0; + } + d = d->next; + } + s = s->next; + } +} +void Parallel::PeriodicBD(Patch *Pat, MyList *VarList, int Symmetry) +{ + int cpusize; + MPI_Comm_size(MPI_COMM_WORLD, &cpusize); + + MyList *dst; + MyList **src, **transfer_src, **transfer_dst; + src = new MyList *[cpusize]; + transfer_src = new MyList *[cpusize]; + transfer_dst = new MyList *[cpusize]; + + dst = build_PhysBD_gsl(Pat); + for (int node = 0; node < cpusize; node++) + { + src[node] = build_owned_gsl0(Pat, node); // for the part without ghost points and do not extend + build_PhysBD_gstl(Pat, src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node + } + + transfer(transfer_src, transfer_dst, VarList, VarList, Symmetry); + + if (dst) + dst->destroyList(); + for (int node = 0; node < cpusize; node++) + { + if (src[node]) + src[node]->destroyList(); + if (transfer_src[node]) + transfer_src[node]->destroyList(); + if (transfer_dst[node]) + transfer_dst[node]->destroyList(); + } + + delete[] src; + delete[] transfer_src; + delete[] transfer_dst; +} +double Parallel::L2Norm(Patch *Pat, var *vf) +{ + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + double tvf, dtvf = 0; + int BDW = ghost_width; + + MyList *BP = Pat->blb; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { + f_l2normhelper(cg->shape, cg->X[0], cg->X[1], cg->X[2], + Pat->bbox[0], Pat->bbox[1], Pat->bbox[2], + Pat->bbox[3], Pat->bbox[4], Pat->bbox[5], + cg->fgfs[vf->sgfn], tvf, BDW); + dtvf += tvf; + } + if (BP == Pat->ble) + break; + BP = BP->next; + } + + MPI_Allreduce(&dtvf, &tvf, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + tvf = sqrt(tvf); + + return tvf; +} +double Parallel::L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here) +{ + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + double tvf, dtvf = 0; + int BDW = ghost_width; + + MyList *BP = Pat->blb; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { + f_l2normhelper(cg->shape, cg->X[0], cg->X[1], cg->X[2], + Pat->bbox[0], Pat->bbox[1], Pat->bbox[2], + Pat->bbox[3], Pat->bbox[4], Pat->bbox[5], + cg->fgfs[vf->sgfn], tvf, BDW); + dtvf += tvf; + } + if (BP == Pat->ble) + break; + BP = BP->next; + } + + MPI_Allreduce(&dtvf, &tvf, 1, MPI_DOUBLE, MPI_SUM, Comm_here); + + tvf = sqrt(tvf); + + return tvf; +} +void Parallel::checkgsl(MyList *pp, bool first_only) +{ + int myrank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + if (myrank == 0) + { + if (!pp) + cout << " Parallel::checkgsl meets empty gsl" << endl; + while (pp) + { + if (pp->data->Bg) + cout << " on node#" << pp->data->Bg->rank << endl; + else + cout << " virtual grid segment" << endl; + cout << " shape: ("; + for (int i = 0; i < dim; i++) + { + if (i < dim - 1) + cout << pp->data->shape[i] << ","; + else + cout << pp->data->shape[i] << ")" << endl; + } + cout << " range: ("; + for (int i = 0; i < dim; i++) + { + if (i < dim - 1) + cout << pp->data->llb[i] << ":" << pp->data->uub[i] << ","; + else + cout << pp->data->llb[i] << ":" << pp->data->uub[i] << ")" << endl; + } + if (first_only) + return; + pp = pp->next; + } + } +} +void Parallel::checkvarl(MyList *pp, bool first_only) +{ + int myrank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + if (myrank == 0) + { + while (pp) + { + cout << "name: " << pp->data->name << endl; + cout << "SoA = (" << pp->data->SoA[0] << "," << pp->data->SoA[1] << "," << pp->data->SoA[2] << ")" << endl; + cout << "sgfn = " << pp->data->sgfn << endl; + if (first_only) + return; + pp = pp->next; + } + } +} +void Parallel::prepare_inter_time_level(MyList *PatL, + MyList *VarList1 /* source (t+dt) */, MyList *VarList2 /* source (t) */, + MyList *VarList3 /* target (t+a*dt) */, int tindex) +{ + while (PatL) + { + prepare_inter_time_level(PatL->data, VarList1, VarList2, VarList3, tindex); + PatL = PatL->next; + } +} +void Parallel::prepare_inter_time_level(Patch *Pat, + MyList *VarList1 /* source (t+dt) */, MyList *VarList2 /* source (t) */, + MyList *VarList3 /* target (t+a*dt) */, int tindex) +{ + int myrank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + MyList *varl1; + MyList *varl2; + MyList *varl3; + + MyList *BP = Pat->blb; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { + varl1 = VarList1; + varl2 = VarList2; + varl3 = VarList3; + while (varl1) + { + if (tindex == 0) + f_average(cg->shape, cg->fgfs[varl1->data->sgfn], cg->fgfs[varl2->data->sgfn], cg->fgfs[varl3->data->sgfn]); + else if (tindex == 1) + f_average3(cg->shape, cg->fgfs[varl1->data->sgfn], cg->fgfs[varl2->data->sgfn], cg->fgfs[varl3->data->sgfn]); + else if (tindex == -1) + // just change data order to use average3 + f_average3(cg->shape, cg->fgfs[varl2->data->sgfn], cg->fgfs[varl1->data->sgfn], cg->fgfs[varl3->data->sgfn]); + else + { + cout << "error tindex in Parallel::prepare_inter_time_level" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + varl1 = varl1->next; + varl2 = varl2->next; + varl3 = varl3->next; + } + } + if (BP == Pat->ble) + break; + BP = BP->next; + } +} +void Parallel::prepare_inter_time_level(MyList *PatL, + MyList *VarList1 /* source (t+dt) */, MyList *VarList2 /* source (t) */, + MyList *VarList3 /* source (t-dt) */, MyList *VarList4 /* target (t+a*dt) */, int tindex) +{ + while (PatL) + { + prepare_inter_time_level(PatL->data, VarList1, VarList2, VarList3, VarList4, tindex); + PatL = PatL->next; + } +} +void Parallel::prepare_inter_time_level(Patch *Pat, + MyList *VarList1 /* source (t+dt) */, MyList *VarList2 /* source (t) */, + MyList *VarList3 /* source (t-dt) */, MyList *VarList4 /* target (t+a*dt) */, int tindex) +{ + int myrank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + MyList *varl1; + MyList *varl2; + MyList *varl3; + MyList *varl4; + + MyList *BP = Pat->blb; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { + varl1 = VarList1; + varl2 = VarList2; + varl3 = VarList3; + varl4 = VarList4; + while (varl1) + { + if (tindex == 0) + f_average2(cg->shape, cg->fgfs[varl1->data->sgfn], cg->fgfs[varl2->data->sgfn], + cg->fgfs[varl3->data->sgfn], cg->fgfs[varl4->data->sgfn]); + else if (tindex == 1) + f_average2p(cg->shape, cg->fgfs[varl1->data->sgfn], cg->fgfs[varl2->data->sgfn], + cg->fgfs[varl3->data->sgfn], cg->fgfs[varl4->data->sgfn]); + else if (tindex == -1) + f_average2m(cg->shape, cg->fgfs[varl1->data->sgfn], cg->fgfs[varl2->data->sgfn], + cg->fgfs[varl3->data->sgfn], cg->fgfs[varl4->data->sgfn]); + else + { + cout << "error tindex in long cgh::prepare_inter_time_level" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + varl1 = varl1->next; + varl2 = varl2->next; + varl3 = varl3->next; + varl4 = varl4->next; + } + } + if (BP == Pat->ble) + break; + BP = BP->next; + } +} +void Parallel::Prolong(Patch *Patc, Patch *Patf, + MyList *VarList1 /* source */, MyList *VarList2 /* target */, + int Symmetry) +{ + if (Patc->lev >= Patf->lev) + { + cout << "Parallel::Prolong: meet requst of Prolong from lev#" << Patc->lev << " to lev#" << Patf->lev << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + int cpusize; + MPI_Comm_size(MPI_COMM_WORLD, &cpusize); + + MyList *dst; + MyList **src, **transfer_src, **transfer_dst; + src = new MyList *[cpusize]; + transfer_src = new MyList *[cpusize]; + transfer_dst = new MyList *[cpusize]; + + dst = build_complete_gsl(Patf); // including ghost + for (int node = 0; node < cpusize; node++) + { + src[node] = build_owned_gsl4(Patc, node, Symmetry); // - buffer - ghost - BD ghost + build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node + } + + transfer(transfer_src, transfer_dst, VarList1, VarList2, Symmetry); + + if (dst) + dst->destroyList(); + for (int node = 0; node < cpusize; node++) + { + if (src[node]) + src[node]->destroyList(); + if (transfer_src[node]) + transfer_src[node]->destroyList(); + if (transfer_dst[node]) + transfer_dst[node]->destroyList(); + } + + delete[] src; + delete[] transfer_src; + delete[] transfer_dst; +} +void Parallel::Restrict(MyList *PatcL, MyList *PatfL, + MyList *VarList1 /* source */, MyList *VarList2 /* target */, + int Symmetry) +{ + if (PatcL->data->lev >= PatfL->data->lev) + { + cout << "Parallel::Restrict: meet requst of Restrict from lev#" << PatfL->data->lev << " to lev#" << PatcL->data->lev << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + int cpusize; + MPI_Comm_size(MPI_COMM_WORLD, &cpusize); + + MyList *dst; + MyList **src, **transfer_src, **transfer_dst; + src = new MyList *[cpusize]; + transfer_src = new MyList *[cpusize]; + transfer_dst = new MyList *[cpusize]; + + dst = build_complete_gsl(PatcL); // including ghost + for (int node = 0; node < cpusize; node++) + { +#if 0 +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + src[node]=build_owned_gsl(PatfL,node,2,Symmetry); // - buffer - ghost +#else +#ifdef Cell + src[node]=build_owned_gsl(PatfL,node,4,Symmetry); // - buffer - ghost - BD ghost +#else +#error Not define Vertex nor Cell +#endif +#endif +#else + // it seems bam always use this + src[node] = build_owned_gsl(PatfL, node, 2, Symmetry); // - buffer - ghost +#endif + build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node + } + + transfer(transfer_src, transfer_dst, VarList1, VarList2, Symmetry); + + if (dst) + dst->destroyList(); + for (int node = 0; node < cpusize; node++) + { + if (src[node]) + src[node]->destroyList(); + if (transfer_src[node]) + transfer_src[node]->destroyList(); + if (transfer_dst[node]) + transfer_dst[node]->destroyList(); + } + + delete[] src; + delete[] transfer_src; + delete[] transfer_dst; +} +void Parallel::Restrict_after(MyList *PatcL, MyList *PatfL, + MyList *VarList1 /* source */, MyList *VarList2 /* target */, + int Symmetry) +{ + if (PatcL->data->lev >= PatfL->data->lev) + { + cout << "Parallel::Restrict: meet requst of Restrict from lev#" << PatfL->data->lev << " to lev#" << PatcL->data->lev << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + int cpusize; + MPI_Comm_size(MPI_COMM_WORLD, &cpusize); + + MyList *dst; + MyList **src, **transfer_src, **transfer_dst; + src = new MyList *[cpusize]; + transfer_src = new MyList *[cpusize]; + transfer_dst = new MyList *[cpusize]; + + dst = build_complete_gsl(PatcL); // including ghost + for (int node = 0; node < cpusize; node++) + { + src[node] = build_owned_gsl(PatfL, node, 3, Symmetry); // - ghost - BD ghost + + build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node + } + + transfer(transfer_src, transfer_dst, VarList1, VarList2, Symmetry); + + if (dst) + dst->destroyList(); + for (int node = 0; node < cpusize; node++) + { + if (src[node]) + src[node]->destroyList(); + if (transfer_src[node]) + transfer_src[node]->destroyList(); + if (transfer_dst[node]) + transfer_dst[node]->destroyList(); + } + + delete[] src; + delete[] transfer_src; + delete[] transfer_dst; +} +// for the same time level +void Parallel::OutBdLow2Hi(Patch *Patc, Patch *Patf, + MyList *VarList1 /* source */, MyList *VarList2 /* target */, + int Symmetry) +{ + if (Patc->lev >= Patf->lev) + { + cout << "Parallel::OutBdLow2Hi: meet requst of Prolong from lev#" << Patc->lev << " to lev#" << Patf->lev << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + int cpusize; + MPI_Comm_size(MPI_COMM_WORLD, &cpusize); + + MyList *dst; + MyList **src, **transfer_src, **transfer_dst; + src = new MyList *[cpusize]; + transfer_src = new MyList *[cpusize]; + transfer_dst = new MyList *[cpusize]; + + dst = build_buffer_gsl(Patf); // buffer region only + + for (int node = 0; node < cpusize; node++) + { + src[node] = build_owned_gsl4(Patc, node, Symmetry); // - buffer - ghost - BD ghost + build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node + } + + transfer(transfer_src, transfer_dst, VarList1, VarList2, Symmetry); + + if (dst) + dst->destroyList(); + for (int node = 0; node < cpusize; node++) + { + if (src[node]) + src[node]->destroyList(); + if (transfer_src[node]) + transfer_src[node]->destroyList(); + if (transfer_dst[node]) + transfer_dst[node]->destroyList(); + } + + delete[] src; + delete[] transfer_src; + delete[] transfer_dst; +} +void Parallel::OutBdLow2Hi(MyList *PatcL, MyList *PatfL, + MyList *VarList1 /* source */, MyList *VarList2 /* target */, + int Symmetry) +{ + MyList *Pp, *Ppc; + Ppc = PatcL; + while (Ppc) + { + Pp = PatfL; + while (Pp) + { + if (Ppc->data->lev >= Pp->data->lev) + { + cout << "Parallel::OutBdLow2Hi(list): meet requst of Prolong from lev#" << Ppc->data->lev << " to lev#" << Pp->data->lev << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + Pp = Pp->next; + } + Ppc = Ppc->next; + } + + int cpusize; + MPI_Comm_size(MPI_COMM_WORLD, &cpusize); + + MyList *dst; + MyList **src, **transfer_src, **transfer_dst; + src = new MyList *[cpusize]; + transfer_src = new MyList *[cpusize]; + transfer_dst = new MyList *[cpusize]; + + dst = build_buffer_gsl(PatfL); // buffer region only + + for (int node = 0; node < cpusize; node++) + { + src[node] = build_owned_gsl(PatcL, node, 4, Symmetry); // - buffer - ghost - BD ghost + build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node + } + + transfer(transfer_src, transfer_dst, VarList1, VarList2, Symmetry); + + if (dst) + dst->destroyList(); + for (int node = 0; node < cpusize; node++) + { + if (src[node]) + src[node]->destroyList(); + if (transfer_src[node]) + transfer_src[node]->destroyList(); + if (transfer_dst[node]) + transfer_dst[node]->destroyList(); + } + + delete[] src; + delete[] transfer_src; + delete[] transfer_dst; +} +// for the same time level +void Parallel::OutBdLow2Himix(Patch *Patc, Patch *Patf, + MyList *VarList1 /* source */, MyList *VarList2 /* target */, + int Symmetry) +{ + if (Patc->lev >= Patf->lev) + { + cout << "Parallel::OutBdLow2Himix: meet requst of Prolong from lev#" << Patc->lev << " to lev#" << Patf->lev << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + int cpusize; + MPI_Comm_size(MPI_COMM_WORLD, &cpusize); + + MyList *dst; + MyList **src, **transfer_src, **transfer_dst; + src = new MyList *[cpusize]; + transfer_src = new MyList *[cpusize]; + transfer_dst = new MyList *[cpusize]; + + dst = build_buffer_gsl(Patf); // buffer region only + + for (int node = 0; node < cpusize; node++) + { + src[node] = build_owned_gsl4(Patc, node, Symmetry); // - buffer - ghost - BD ghost + build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node + } + + transfermix(transfer_src, transfer_dst, VarList1, VarList2, Symmetry); + + if (dst) + dst->destroyList(); + for (int node = 0; node < cpusize; node++) + { + if (src[node]) + src[node]->destroyList(); + if (transfer_src[node]) + transfer_src[node]->destroyList(); + if (transfer_dst[node]) + transfer_dst[node]->destroyList(); + } + + delete[] src; + delete[] transfer_src; + delete[] transfer_dst; + + // do not need this, we have done after calling of this routine in ProlongRestrict or RestrictProlong + // Sync(Patf,VarList2,Symmetry); // fine level points may be not enough for interpolation +} +void Parallel::OutBdLow2Himix(MyList *PatcL, MyList *PatfL, + MyList *VarList1 /* source */, MyList *VarList2 /* target */, + int Symmetry) +{ + MyList *Pp, *Ppc; + Ppc = PatcL; + while (Ppc) + { + Pp = PatfL; + while (Pp) + { + if (Ppc->data->lev >= Pp->data->lev) + { + cout << "Parallel::OutBdLow2Himix(list): meet requst of Prolong from lev#" << Ppc->data->lev << " to lev#" << Pp->data->lev << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + Pp = Pp->next; + } + Ppc = Ppc->next; + } + + int cpusize; + MPI_Comm_size(MPI_COMM_WORLD, &cpusize); + + MyList *dst; + MyList **src, **transfer_src, **transfer_dst; + src = new MyList *[cpusize]; + transfer_src = new MyList *[cpusize]; + transfer_dst = new MyList *[cpusize]; + + dst = build_buffer_gsl(PatfL); // buffer region only + + for (int node = 0; node < cpusize; node++) + { + src[node] = build_owned_gsl(PatcL, node, 4, Symmetry); // - buffer - ghost - BD ghost + build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node + } + + transfermix(transfer_src, transfer_dst, VarList1, VarList2, Symmetry); + + if (dst) + dst->destroyList(); + for (int node = 0; node < cpusize; node++) + { + if (src[node]) + src[node]->destroyList(); + if (transfer_src[node]) + transfer_src[node]->destroyList(); + if (transfer_dst[node]) + transfer_dst[node]->destroyList(); + } + + delete[] src; + delete[] transfer_src; + delete[] transfer_dst; +} + +// Restrict_cached: cache grid segment lists, reuse buffers via transfer_cached +void Parallel::Restrict_cached(MyList *PatcL, MyList *PatfL, + MyList *VarList1, MyList *VarList2, + int Symmetry, SyncCache &cache) +{ + if (!cache.valid) + { + int cpusize; + MPI_Comm_size(MPI_COMM_WORLD, &cpusize); + cache.cpusize = cpusize; + + if (!cache.combined_src) + { + cache.combined_src = new MyList *[cpusize]; + cache.combined_dst = new MyList *[cpusize]; + cache.send_lengths = new int[cpusize]; + cache.recv_lengths = new int[cpusize]; + cache.send_bufs = new double *[cpusize]; + cache.recv_bufs = new double *[cpusize]; + cache.send_buf_caps = new int[cpusize]; + cache.recv_buf_caps = new int[cpusize]; + for (int i = 0; i < cpusize; i++) + { + cache.send_bufs[i] = cache.recv_bufs[i] = 0; + cache.send_buf_caps[i] = cache.recv_buf_caps[i] = 0; + } + cache.max_reqs = 2 * cpusize; + cache.reqs = new MPI_Request[cache.max_reqs]; + cache.stats = new MPI_Status[cache.max_reqs]; + } + + MyList *dst = build_complete_gsl(PatcL); + for (int node = 0; node < cpusize; node++) + { + MyList *src_owned = build_owned_gsl(PatfL, node, 2, Symmetry); + build_gstl(src_owned, dst, &cache.combined_src[node], &cache.combined_dst[node]); + if (src_owned) src_owned->destroyList(); + } + if (dst) dst->destroyList(); + + cache.valid = true; + } + + transfer_cached(cache.combined_src, cache.combined_dst, VarList1, VarList2, Symmetry, cache); +} + +// OutBdLow2Hi_cached: cache grid segment lists, reuse buffers via transfer_cached +void Parallel::OutBdLow2Hi_cached(MyList *PatcL, MyList *PatfL, + MyList *VarList1, MyList *VarList2, + int Symmetry, SyncCache &cache) +{ + if (!cache.valid) + { + int cpusize; + MPI_Comm_size(MPI_COMM_WORLD, &cpusize); + cache.cpusize = cpusize; + + if (!cache.combined_src) + { + cache.combined_src = new MyList *[cpusize]; + cache.combined_dst = new MyList *[cpusize]; + cache.send_lengths = new int[cpusize]; + cache.recv_lengths = new int[cpusize]; + cache.send_bufs = new double *[cpusize]; + cache.recv_bufs = new double *[cpusize]; + cache.send_buf_caps = new int[cpusize]; + cache.recv_buf_caps = new int[cpusize]; + for (int i = 0; i < cpusize; i++) + { + cache.send_bufs[i] = cache.recv_bufs[i] = 0; + cache.send_buf_caps[i] = cache.recv_buf_caps[i] = 0; + } + cache.max_reqs = 2 * cpusize; + cache.reqs = new MPI_Request[cache.max_reqs]; + cache.stats = new MPI_Status[cache.max_reqs]; + } + + MyList *dst = build_buffer_gsl(PatfL); + for (int node = 0; node < cpusize; node++) + { + MyList *src_owned = build_owned_gsl(PatcL, node, 4, Symmetry); + build_gstl(src_owned, dst, &cache.combined_src[node], &cache.combined_dst[node]); + if (src_owned) src_owned->destroyList(); + } + if (dst) dst->destroyList(); + + cache.valid = true; + } + + transfer_cached(cache.combined_src, cache.combined_dst, VarList1, VarList2, Symmetry, cache); +} + +// OutBdLow2Himix_cached: same as OutBdLow2Hi_cached but uses transfermix for unpacking +void Parallel::OutBdLow2Himix_cached(MyList *PatcL, MyList *PatfL, + MyList *VarList1, MyList *VarList2, + int Symmetry, SyncCache &cache) +{ + if (!cache.valid) + { + int cpusize; + MPI_Comm_size(MPI_COMM_WORLD, &cpusize); + cache.cpusize = cpusize; + + if (!cache.combined_src) + { + cache.combined_src = new MyList *[cpusize]; + cache.combined_dst = new MyList *[cpusize]; + cache.send_lengths = new int[cpusize]; + cache.recv_lengths = new int[cpusize]; + cache.send_bufs = new double *[cpusize]; + cache.recv_bufs = new double *[cpusize]; + cache.send_buf_caps = new int[cpusize]; + cache.recv_buf_caps = new int[cpusize]; + for (int i = 0; i < cpusize; i++) + { + cache.send_bufs[i] = cache.recv_bufs[i] = 0; + cache.send_buf_caps[i] = cache.recv_buf_caps[i] = 0; + } + cache.max_reqs = 2 * cpusize; + cache.reqs = new MPI_Request[cache.max_reqs]; + cache.stats = new MPI_Status[cache.max_reqs]; + } + + MyList *dst = build_buffer_gsl(PatfL); + for (int node = 0; node < cpusize; node++) + { + MyList *src_owned = build_owned_gsl(PatcL, node, 4, Symmetry); + build_gstl(src_owned, dst, &cache.combined_src[node], &cache.combined_dst[node]); + if (src_owned) src_owned->destroyList(); + } + if (dst) dst->destroyList(); + + cache.valid = true; + } + + // Use transfermix instead of transfer for mix-mode interpolation + int myrank; + MPI_Comm_size(MPI_COMM_WORLD, &cache.cpusize); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + int cpusize = cache.cpusize; + + int req_no = 0; + for (int node = 0; node < cpusize; node++) + { + if (node == myrank) + { + int length = data_packermix(0, cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry); + cache.recv_lengths[node] = length; + if (length > 0) + { + if (length > cache.recv_buf_caps[node]) + { + if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node]; + cache.recv_bufs[node] = new double[length]; + cache.recv_buf_caps[node] = length; + } + data_packermix(cache.recv_bufs[node], cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry); + } + } + else + { + int slength = data_packermix(0, cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry); + cache.send_lengths[node] = slength; + if (slength > 0) + { + if (slength > cache.send_buf_caps[node]) + { + if (cache.send_bufs[node]) delete[] cache.send_bufs[node]; + cache.send_bufs[node] = new double[slength]; + cache.send_buf_caps[node] = slength; + } + data_packermix(cache.send_bufs[node], cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry); + MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, cache.reqs + req_no++); + } + int rlength = data_packermix(0, cache.combined_src[node], cache.combined_dst[node], node, UNPACK, VarList1, VarList2, Symmetry); + cache.recv_lengths[node] = rlength; + if (rlength > 0) + { + if (rlength > cache.recv_buf_caps[node]) + { + if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node]; + cache.recv_bufs[node] = new double[rlength]; + cache.recv_buf_caps[node] = rlength; + } + MPI_Irecv((void *)cache.recv_bufs[node], rlength, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, cache.reqs + req_no++); + } + } + } + + MPI_Waitall(req_no, cache.reqs, cache.stats); + + for (int node = 0; node < cpusize; node++) + if (cache.recv_bufs[node] && cache.recv_lengths[node] > 0) + data_packermix(cache.recv_bufs[node], cache.combined_src[node], cache.combined_dst[node], node, UNPACK, VarList1, VarList2, Symmetry); +} + +// collect all buffer grid segments or blocks for given patch +MyList *Parallel::build_buffer_gsl(Patch *Pat) +{ + MyList *cgsl, *gsc, *gsb; + + gsc = build_complete_gsl(Pat); // including ghost + + gsb = new MyList; + gsb->data = new Parallel::gridseg; + + for (int i = 0; i < dim; i++) + { + double DH = Pat->blb->data->getdX(i); + gsb->data->uub[i] = Pat->bbox[dim + i] - Pat->uui[i] * DH; + gsb->data->llb[i] = Pat->bbox[i] + Pat->lli[i] * DH; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + gsb->data->shape[i] = int((gsb->data->uub[i] - gsb->data->llb[i]) / DH + 0.4) + 1; +#else +#ifdef Cell + gsb->data->shape[i] = int((gsb->data->uub[i] - gsb->data->llb[i]) / DH + 0.4); +#else +#error Not define Vertex nor Cell +#endif +#endif + } + gsb->data->Bg = 0; + gsb->next = 0; + + cgsl = gsl_subtract(gsc, gsb); + + gsc->destroyList(); + gsb->destroyList(); + + // set illb and iuub + gsb = cgsl; + while (gsb) + { + for (int i = 0; i < dim; i++) + { + double DH = Pat->blb->data->getdX(i); + gsb->data->iuub[i] = Pat->bbox[dim + i] - Pat->uui[i] * DH; + gsb->data->illb[i] = Pat->bbox[i] + Pat->lli[i] * DH; + } + gsb = gsb->next; + } + + return cgsl; +} +MyList *Parallel::build_buffer_gsl(MyList *PatL) +{ + MyList *cgsl = 0, *gs; + while (PatL) + { + if (cgsl) + { + gs->next = build_buffer_gsl(PatL->data); + gs = gs->next; + if (gs) + while (gs->next) + gs = gs->next; + } + else + { + cgsl = build_buffer_gsl(PatL->data); + gs = cgsl; + if (gs) + while (gs->next) + gs = gs->next; + } + PatL = PatL->next; + } + + return cgsl; +} +void Parallel::Prolongint(Patch *Patc, Patch *Patf, + MyList *VarList1 /* source */, MyList *VarList2 /* target */, + int Symmetry) +{ + if (Patc->lev >= Patf->lev) + { + cout << "Parallel::Prolong: meet requst of Prolong from lev#" << Patc->lev << " to lev#" << Patf->lev << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + int myrank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + int num_var = 0; + MyList *varl; + varl = VarList1; + while (varl) + { + num_var++; + varl = varl->next; + } + + MyList *BP = Patf->blb; + while (BP) + { + int Npts; + if (myrank == BP->data->rank) + Npts = BP->data->shape[0] * BP->data->shape[1] * BP->data->shape[2]; + MPI_Bcast(&Npts, 1, MPI_INT, BP->data->rank, MPI_COMM_WORLD); + double *pox[3]; + for (int i = 0; i < 3; i++) + pox[i] = new double[Npts]; + if (myrank == BP->data->rank) + { + for (int i = 0; i < Npts; i++) + { + int ind[3]; + Parallel::getarrayindex(3, BP->data->shape, ind, i); + pox[0][i] = BP->data->X[0][ind[0]]; + pox[1][i] = BP->data->X[1][ind[1]]; + pox[2][i] = BP->data->X[2][ind[2]]; + } + } + for (int i = 0; i < 3; i++) + MPI_Bcast(pox[i], Npts, MPI_DOUBLE, BP->data->rank, MPI_COMM_WORLD); + double *res; + res = new double[num_var * Npts]; + Patc->Interp_Points(VarList1, Npts, pox, res, Symmetry); // because this operation is a global operation (for all processors) + // we have to isolate it out of myrank==BP->data->rank + if (myrank == BP->data->rank) + { + for (int i = 0; i < Npts; i++) + { + varl = VarList2; + int j = 0; + while (varl) + { + (BP->data->fgfs[varl->data->sgfn])[i] = res[j + i * num_var]; + j++; + varl = varl->next; + } + } + } + delete[] pox[0]; + delete[] pox[1]; + delete[] pox[2]; + delete[] res; + BP = BP->next; + } +} +// +void Parallel::merge_gsl(MyList *&A, const double ratio) +{ + if (!A) + return; + + MyList *B, *C, *D = A; + bool flag = false; + while (D->next) + { + B = D->next; + while (B) + { + flag = merge_gs(D, B, C, ratio); + if (flag) + break; + B = B->next; + } + if (flag) + break; + D = D->next; + } + + if (flag) + { + // delete D and B from A + MyList *E = A; + while (E->next) + { + MyList *tp = E->next; + if (D == tp || B == tp) + { + E->next = (tp->next) ? tp->next : 0; + delete tp->data; + delete tp; + } + if (E->next) + E = E->next; + } + + if (D == A) + { + MyList *tp = A; + A = (A->next) ? A->next : 0; + delete tp->data; + delete tp; + } + // cat C to A + if (A) + A->catList(C); + else + A = C; + + merge_gsl(A, ratio); + } +} +// +bool Parallel::merge_gs(MyList *D, MyList *B, MyList *&C, const double ratio) +{ + if (!B || !D) + return false; + + C = 0; + double llb[dim], uub[dim], DH[dim]; + for (int i = 0; i < dim; i++) + { + double tdh; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + DH[i] = (D->data->uub[i] - D->data->llb[i]) / (D->data->shape[i] - 1); + tdh = (B->data->uub[i] - B->data->llb[i]) / (B->data->shape[i] - 1); +#else +#ifdef Cell + DH[i] = (D->data->uub[i] - D->data->llb[i]) / D->data->shape[i]; + tdh = (B->data->uub[i] - B->data->llb[i]) / B->data->shape[i]; +#else +#error Not define Vertex nor Cell +#endif +#endif + if (!feq(DH[i], tdh, DH[i] / 2)) + { + cout << "Parallel::merge_gs meets different grid segment " << DH[i] << " vs " << tdh << endl; + checkgsl(B, true); + checkgsl(D, true); + MPI_Abort(MPI_COMM_WORLD, 1); + } + llb[i] = Mymax(D->data->llb[i], B->data->llb[i]); + uub[i] = Mymin(D->data->uub[i], B->data->uub[i]); + // if(uub[i]-llb[i] < DH[i]/2) return false; //here this is valid for both vertex and cell + + // use 0 instead of DH[i]/2, we consider contact case, 2012 Aug 8 + if (uub[i] - llb[i] < 0) + return false; // here this is valid for both vertex and cell + } + + // vb: volume of B + // vd: volume of D + // vo: volume of overlap + // vt: volume of smallest common box (virtual merged box) + double vd = 1, vb = 1, vt = 1, vo = 1; + for (int i = 0; i < dim; i++) + { + vt = vt * (Mymax(D->data->uub[i], B->data->uub[i]) - Mymin(D->data->llb[i], B->data->llb[i])); + vo = vo * (uub[i] - llb[i]); + vd = vd * (D->data->uub[i] - D->data->llb[i]); + vb = vb * (B->data->uub[i] - B->data->llb[i]); + } + + // smller ratio, more possible to merge + if ((vd + vb - vo) / vt > ratio) + { + C = new MyList; + C->data = new gridseg; + for (int i = 0; i < dim; i++) + { + C->data->uub[i] = Mymax(D->data->uub[i], B->data->uub[i]); + C->data->llb[i] = Mymin(D->data->llb[i], B->data->llb[i]); +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + C->data->shape[i] = int((C->data->uub[i] - C->data->llb[i]) / DH[i] + 0.4) + 1; +#else +#ifdef Cell + C->data->shape[i] = int((C->data->uub[i] - C->data->llb[i]) / DH[i] + 0.4); +#else +#error Not define Vertex nor Cell +#endif +#endif + } + if (D->data->Bg == B->data->Bg) + C->data->Bg = D->data->Bg; + else + C->data->Bg = 0; + + C->next = 0; + + return true; + } + else + { + return false; + } +} +// Add ghost region to tangent plane +// we assume the grids have the same resolution +void Parallel::add_ghost_touch(MyList *&A) +{ + if (!A || !(A->next)) + return; + + double DH[dim]; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + for (int i = 0; i < dim; i++) + DH[i] = (A->data->uub[i] - A->data->llb[i]) / (A->data->shape[i] - 1) / 2; +#else +#ifdef Cell + for (int i = 0; i < dim; i++) + DH[i] = (A->data->uub[i] - A->data->llb[i]) / A->data->shape[i] / 2; +#else +#error Not define Vertex nor Cell +#endif +#endif + + MyList *C1, *C2, *A1 = A, *A2, *dc; + dc = C1 = clone_gsl(A, false); + while (C1) + { + C2 = C1->next; + A2 = A1->next; + while (C2) + { + for (int i = 0; i < dim; i++) + { + if (feq(C1->data->llb[i], C2->data->uub[i], DH[i])) + { + // direction i touch, other directions overlap + bool flag = true; + for (int j = 0; j < i; j++) + if ((C1->data->llb[j] - C2->data->llb[j]) * (C1->data->uub[j] - C2->data->llb[j]) > 0 && + (C2->data->llb[j] - C1->data->llb[j]) * (C2->data->uub[j] - C1->data->llb[j]) > 0) + flag = false; + for (int j = i + 1; j < dim; j++) + if ((C1->data->llb[j] - C2->data->llb[j]) * (C1->data->uub[j] - C2->data->llb[j]) > 0 && + (C2->data->llb[j] - C1->data->llb[j]) * (C2->data->uub[j] - C1->data->llb[j]) > 0) + flag = false; + + if (flag) + { + // only add one ghost region + if (feq(A1->data->llb[i], C1->data->llb[i], DH[i])) + { + A1->data->llb[i] -= ghost_width * 2 * DH[i]; + A1->data->shape[i] += ghost_width; + } + if (feq(A2->data->uub[i], C2->data->uub[i], DH[i])) + { + A2->data->uub[i] += ghost_width * 2 * DH[i]; + A2->data->shape[i] += ghost_width; + } + } + } + if (feq(C1->data->uub[i], C2->data->llb[i], DH[i])) + { + // direction i touch, other directions overlap + bool flag = true; + for (int j = 0; j < i; j++) + if ((C1->data->llb[j] - C2->data->llb[j]) * (C1->data->uub[j] - C2->data->llb[j]) > 0 && + (C2->data->llb[j] - C1->data->llb[j]) * (C2->data->uub[j] - C1->data->llb[j]) > 0) + flag = false; + for (int j = i + 1; j < dim; j++) + if ((C1->data->llb[j] - C2->data->llb[j]) * (C1->data->uub[j] - C2->data->llb[j]) > 0 && + (C2->data->llb[j] - C1->data->llb[j]) * (C2->data->uub[j] - C1->data->llb[j]) > 0) + flag = false; + + if (flag) + { + // only add one ghost region + if (feq(A1->data->uub[i], C1->data->uub[i], DH[i])) + { + A1->data->uub[i] += ghost_width * 2 * DH[i]; + A1->data->shape[i] += ghost_width; + } + if (feq(A2->data->llb[i], C2->data->llb[i], DH[i])) + { + A2->data->llb[i] -= ghost_width * 2 * DH[i]; + A2->data->shape[i] += ghost_width; + } + } + } + } + C2 = C2->next; + A2 = A2->next; + } + C1 = C1->next; + A1 = A1->next; + } + + if (dc) + dc->destroyList(); +} +// According to overlap to cut the gsl into recular pices +void Parallel::cut_gsl(MyList *&A) +{ + if (!A) + return; + + MyList *B, *C, *D = A; + bool flag = false; + while (D->next) + { + B = D->next; + while (B) + { + flag = cut_gs(D, B, C); + if (flag) + break; + B = B->next; + } + if (flag) + break; + D = D->next; + } + + if (flag) + { + // delete D and B from A + MyList *E = A; + while (E->next) + { + MyList *tp = E->next; + if (D == tp || B == tp) + { + E->next = (tp->next) ? tp->next : 0; + delete tp->data; + delete tp; + } + if (E->next) + E = E->next; + } + + if (D == A) + { + MyList *tp = A; + A = (A->next) ? A->next : 0; + delete tp->data; + delete tp; + } + // cat C to A + if (A) + A->catList(C); + else + A = C; + + cut_gsl(A); + } +} +// when D and B have overlap, cut them into C and return true +// otherwise return false and C=0 +bool Parallel::cut_gs(MyList *D, MyList *B, MyList *&C) +{ + C = 0; + double llb[dim], uub[dim], DH[dim]; + for (int i = 0; i < dim; i++) + { + double tdh; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + DH[i] = (D->data->uub[i] - D->data->llb[i]) / (D->data->shape[i] - 1); + tdh = (B->data->uub[i] - B->data->llb[i]) / (B->data->shape[i] - 1); +#else +#ifdef Cell + DH[i] = (D->data->uub[i] - D->data->llb[i]) / D->data->shape[i]; + tdh = (B->data->uub[i] - B->data->llb[i]) / B->data->shape[i]; +#else +#error Not define Vertex nor Cell +#endif +#endif + if (!feq(DH[i], tdh, DH[i] / 2)) + { + cout << "Parallel::cut_gs meets different grid segment " << DH[i] << " vs " << tdh << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + llb[i] = Mymax(D->data->llb[i], B->data->llb[i]); + uub[i] = Mymin(D->data->uub[i], B->data->uub[i]); + // for efficiency we ask the width of the patch at least 2(buffer+ghost+BD ghost) + if (uub[i] - llb[i] < DH[i] * 2 * (buffer_width + 2 * ghost_width)) + return false; // here this is valid for both vertex and cell + } + + // this part code results in 5 patches generally + + C = new MyList; + C->data = new gridseg; + for (int i = 0; i < dim; i++) + { + C->data->llb[i] = llb[i]; + C->data->uub[i] = uub[i]; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + C->data->shape[i] = int((C->data->uub[i] - C->data->llb[i]) / DH[i] + 0.4) + 1; +#else +#ifdef Cell + C->data->shape[i] = int((C->data->uub[i] - C->data->llb[i]) / DH[i] + 0.4); +#else +#error Not define Vertex nor Cell +#endif +#endif + } + if (D->data->Bg == B->data->Bg) + C->data->Bg = D->data->Bg; + else + C->data->Bg = 0; + + C->next = gs_subtract_virtual(D, C); + + MyList *E = C; + + while (E->next) + E = E->next; + + E->next = gs_subtract_virtual(B, C); + + // this part code results in 3 patches generally + /* + C = clone_gsl(D,true); + C->next = gs_subtract_virtual(B,C); + */ + + return true; +} +// note here it is different to real cut, we need leave the cutting edge for both vertex center and cell center +MyList *Parallel::gs_subtract_virtual(MyList *A, MyList *B) +{ + if (!A) + return 0; + if (!B) + return clone_gsl(A, true); + + double cut_plane[2 * dim], DH[dim]; + + for (int i = 0; i < dim; i++) + { + double tdh; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + DH[i] = (A->data->uub[i] - A->data->llb[i]) / (A->data->shape[i] - 1); + tdh = (B->data->uub[i] - B->data->llb[i]) / (B->data->shape[i] - 1); +#else +#ifdef Cell + DH[i] = (A->data->uub[i] - A->data->llb[i]) / A->data->shape[i]; + tdh = (B->data->uub[i] - B->data->llb[i]) / B->data->shape[i]; +#else +#error Not define Vertex nor Cell +#endif +#endif + if (!feq(DH[i], tdh, DH[i] / 2)) + { + cout << "Parallel::gs_subtract_virtual meets different grid segment " << DH[i] << " vs " << tdh << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + + MyList *C = 0, *q; + for (int i = 0; i < dim; i++) + { + if (B->data->llb[i] > A->data->uub[i] || B->data->uub[i] < A->data->llb[i]) + return clone_gsl(A, true); + cut_plane[i] = A->data->llb[i]; + cut_plane[i + dim] = A->data->uub[i]; + } + + for (int i = 0; i < dim; i++) + { + cut_plane[i] = Mymax(A->data->llb[i], B->data->llb[i]); + if (cut_plane[i] > A->data->llb[i]) + { + q = clone_gsl(A, true); + // prolong the list from head + if (C) + q->next = C; + C = q; + for (int j = 0; j < dim; j++) + { + if (i == j) + { + C->data->llb[i] = A->data->llb[i]; + // **note here it is different to real cut, we need leave the cutting edge for both vertex center and cell center** + C->data->uub[i] = Mymax(C->data->llb[i], cut_plane[i]); + } + else + { + C->data->llb[j] = cut_plane[j]; + C->data->uub[j] = cut_plane[j + dim]; + } +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + C->data->shape[j] = int((C->data->uub[j] - C->data->llb[j]) / DH[j] + 0.4) + 1; +#else +#ifdef Cell + C->data->shape[j] = int((C->data->uub[j] - C->data->llb[j]) / DH[j] + 0.4); +#else +#error Not define Vertex nor Cell +#endif +#endif + } + } + + cut_plane[i + dim] = Mymin(A->data->uub[i], B->data->uub[i]); + if (cut_plane[i + dim] < A->data->uub[i]) + { + q = clone_gsl(A, true); + if (C) + q->next = C; + C = q; + for (int j = 0; j < dim; j++) + { + if (i == j) + { + C->data->uub[i] = A->data->uub[i]; + // note here it is different to real cut, we need leave the cutting edge for both vertex center and cell center + C->data->llb[i] = Mymin(C->data->uub[i], cut_plane[i + dim]); + } + else + { + C->data->llb[j] = cut_plane[j]; + C->data->uub[j] = cut_plane[j + dim]; + } +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + C->data->shape[j] = int((C->data->uub[j] - C->data->llb[j]) / DH[j] + 0.4) + 1; +#else +#ifdef Cell + C->data->shape[j] = int((C->data->uub[j] - C->data->llb[j]) / DH[j] + 0.4); +#else +#error Not define Vertex nor Cell +#endif +#endif + } + } + } + return C; +} +// note the data structure +// if CC is true +// 1 ----------- 1 ------ ^ +// 0 ------ | t +// 0 ----------- old ------ | +// +// old ----------- +// if CC is false +// 1 ----------- 1 ------ ^ +// 0 ----------- 0 ------ | t +// old ----------- old ------ | +void Parallel::fill_level_data(MyList *PatLd, MyList *PatLs, MyList *PatcL, + MyList *OldList, MyList *StateList, MyList *FutureList, + MyList *tmList, int Symmetry, bool BB, bool CC) +{ + if (PatLd->data->lev != PatLs->data->lev) + { + cout << "Parallel::fill_level_data: meet requst from lev#" << PatLs->data->lev << " to lev#" << PatLd->data->lev << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + if (PatLd->data->lev <= PatcL->data->lev) + { + cout << "Parallel::fill_level_data: meet prolong requst from lev#" << PatcL->data->lev << " to lev#" << PatLd->data->lev << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + int cpusize; + MPI_Comm_size(MPI_COMM_WORLD, &cpusize); + + MyList *VarList = 0; + MyList *p; + p = StateList; + while (p) + { + if (VarList) + VarList->insert(p->data); + else + VarList = new MyList(p->data); + p = p->next; + } + p = FutureList; + while (p) + { + if (VarList) + VarList->insert(p->data); + else + VarList = new MyList(p->data); + p = p->next; + } + + MyList *dst; + MyList **src, **transfer_src, **transfer_dst; + src = new MyList *[cpusize]; + transfer_src = new MyList *[cpusize]; + transfer_dst = new MyList *[cpusize]; + + dst = build_complete_gsl(PatLd); // including ghost + // copy part + for (int node = 0; node < cpusize; node++) + { + src[node] = build_owned_gsl(PatLs, node, 0, Symmetry); // similar to Sync + build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node + } + + transfer(transfer_src, transfer_dst, VarList, VarList, Symmetry); + + for (int node = 0; node < cpusize; node++) + { + if (src[node]) + src[node]->destroyList(); + if (transfer_src[node]) + transfer_src[node]->destroyList(); + if (transfer_dst[node]) + transfer_dst[node]->destroyList(); + } + + MyList *dsts, *dstd; + dsts = build_complete_gsl_virtual(PatLs); + dstd = dst; + dst = gsl_subtract(dstd, dsts); + if (dstd) + dstd->destroyList(); + if (dsts) + dsts->destroyList(); + + if (dst) + { + // prolongation part + for (int node = 0; node < cpusize; node++) + { + src[node] = build_owned_gsl(PatcL, node, 4, Symmetry); // - buffer - ghost - BD ghost + build_gstl(src[node], dst, &transfer_src[node], &transfer_dst[node]); // for transfer[node], data locate on cpu#node + } + + if (CC) + { + // for FutureList + // restrict first~~~> + { + Restrict(PatcL, PatLs, FutureList, FutureList, Symmetry); + Sync(PatcL, FutureList, Symmetry); + } + //<~~~prolong then + transfer(transfer_src, transfer_dst, FutureList, FutureList, Symmetry); + + // for StateList + // time interpolation part + if (BB) + prepare_inter_time_level(PatcL, FutureList, StateList, OldList, + tmList, 0); // use SynchList_pre as temporal storage space + else + prepare_inter_time_level(PatcL, FutureList, StateList, + tmList, 0); // use SynchList_pre as temporal storage space + // restrict first~~~> + { + Restrict(PatcL, PatLs, StateList, tmList, Symmetry); + Sync(PatcL, tmList, Symmetry); + } + //<~~~prolong then + transfer(transfer_src, transfer_dst, tmList, StateList, Symmetry); + } + else + { + // for both FutureList and StateList + // restrict first~~~> + { + Restrict(PatcL, PatLs, VarList, VarList, Symmetry); + Sync(PatcL, VarList, Symmetry); + } + //<~~~prolong then + transfer(transfer_src, transfer_dst, VarList, VarList, Symmetry); + } + + for (int node = 0; node < cpusize; node++) + { + if (src[node]) + src[node]->destroyList(); + if (transfer_src[node]) + transfer_src[node]->destroyList(); + if (transfer_dst[node]) + transfer_dst[node]->destroyList(); + } + + dst->destroyList(); + } + + delete[] src; + delete[] transfer_src; + delete[] transfer_dst; + + VarList->clearList(); +} +void Parallel::KillBlocks(MyList *PatchLIST) +{ + while (PatchLIST) + { + Patch *Pp = PatchLIST->data; + MyList *bg; + while (Pp->blb) + { + if (Pp->blb == Pp->ble) + break; + bg = (Pp->blb->next) ? Pp->blb->next : 0; + delete Pp->blb->data; + delete Pp->blb; + Pp->blb = bg; + } + if (Pp->ble) + { + delete Pp->ble->data; + delete Pp->ble; + } + Pp->blb = Pp->ble = 0; + PatchLIST = PatchLIST->next; + } +} +bool Parallel::PatList_Interp_Points(MyList *PatL, MyList *VarList, + int NN, double **XX, + double *Shellf, int Symmetry) +{ + MyList *varl; + int num_var = 0; + varl = VarList; + while (varl) + { + num_var++; + varl = varl->next; + } + + double lld[dim], uud[dim]; + double **pox; + pox = new double *[dim]; + for (int j = 0; j < dim; j++) + pox[j] = new double[1]; + for (int i = 0; i < NN; i++) + { + MyList *PL = PatL; + while (PL) + { + bool flag = true; + for (int j = 0; j < dim; j++) + { + double h = PL->data->getdX(j); + lld[j] = PL->data->lli[j] * h; + uud[j] = PL->data->uui[j] * h; + if (XX[j][i] < PL->data->bbox[j] + lld[j] || XX[j][i] > PL->data->bbox[j + dim] - uud[j]) + { + flag = false; + break; + } + pox[j][0] = XX[j][i]; + } + if (flag) + { + PL->data->Interp_Points(VarList, 1, pox, Shellf + i * num_var, Symmetry); + break; + } + PL = PL->next; + } + if (!PL) + { + checkpatchlist(PatL, false); + return false; + } + } + for (int j = 0; j < dim; j++) + delete[] pox[j]; + delete[] pox; + + return true; +} +bool Parallel::PatList_Interp_Points(MyList *PatL, MyList *VarList, + int NN, double **XX, + double *Shellf, int Symmetry, MPI_Comm Comm_here) +{ + MyList *varl; + int num_var = 0; + varl = VarList; + while (varl) + { + num_var++; + varl = varl->next; + } + + double lld[dim], uud[dim]; + double **pox; + pox = new double *[dim]; + for (int j = 0; j < dim; j++) + pox[j] = new double[1]; + for (int i = 0; i < NN; i++) + { + MyList *PL = PatL; + while (PL) + { + bool flag = true; + for (int j = 0; j < dim; j++) + { + double h = PL->data->getdX(j); + lld[j] = PL->data->lli[j] * h; + uud[j] = PL->data->uui[j] * h; + if (XX[j][i] < PL->data->bbox[j] + lld[j] || XX[j][i] > PL->data->bbox[j + dim] - uud[j]) + { + flag = false; + break; + } + pox[j][0] = XX[j][i]; + } + if (flag) + { + PL->data->Interp_Points(VarList, 1, pox, Shellf + i * num_var, Symmetry, Comm_here); + break; + } + PL = PL->next; + } + if (!PL) + { + checkpatchlist(PatL, false); + return false; + } + } + for (int j = 0; j < dim; j++) + delete[] pox[j]; + delete[] pox; + + return true; +} +void Parallel::aligncheck(double *bbox0, double *bboxl, int lev, double *DH0, int *shape) +{ + const double aligntiny = 0.1; + double DHl, rr; + int NN; + for (int i = 0; i < dim; i++) + { + DHl = DH0[i] * pow(0.5, lev); + rr = bboxl[i] - bbox0[i]; + bboxl[i] = bbox0[i] + int(rr / DHl + 0.4) * DHl; + rr = bbox0[i + dim] - bboxl[i + dim]; + bboxl[i + dim] = bbox0[i + dim] - int(rr / DHl + 0.4) * DHl; +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + NN = int((bboxl[i + dim] - bboxl[i]) / DHl + 0.4) + 1; +#else +#ifdef Cell + NN = int((bboxl[i + dim] - bboxl[i]) / DHl + 0.4); +#else +#error Not define Vertex nor Cell +#endif +#endif + if (NN != shape[i]) + { + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + if (myrank == 0) + { + cout << "Parallel::aligncheck want shape " << NN << " for lev#" << lev << ", but " << shape[i] << endl; + cout << "i = " << i << ", low = " << bboxl[i] << ", up = " << bboxl[i + dim] << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + } +} +bool Parallel::point_locat_gsl(double *pox, MyList *gsl) +{ + bool flag = false; + while (gsl) + { + for (int i = 0; i < dim; i++) + { + if (pox[i] > gsl->data->llb[i] && pox[i] < gsl->data->uub[i]) + flag = true; + else + { + flag = false; + break; + } + } + if (flag) + break; + gsl = gsl->next; + } + + return flag; +} +void Parallel::checkpatchlist(MyList *PatL, bool buflog) +{ + MyList *PL = PatL; + while (PL) + { + PL->data->checkPatch(buflog); + PL = PL->next; + } +} +// Check if load balancing is needed based on interpolation times +bool Parallel::check_load_balance_need(double *rank_times, int nprocs, int &num_heavy, int *heavy_ranks) +{ + // Calculate average time + double avg_time = 0; + for (int r = 0; r < nprocs; r++) + { + avg_time += rank_times[r]; + } + avg_time /= nprocs; + + // Identify heavy ranks (time > 1.5x average) + std::vector> rank_times_vec; + for (int r = 0; r < nprocs; r++) + { + if (rank_times[r] > avg_time * 1.5) + { + rank_times_vec.push_back(std::make_pair(r, rank_times[r])); + } + } + + // Sort by time (descending) + std::sort(rank_times_vec.begin(), rank_times_vec.end(), + [](const std::pair& a, const std::pair& b) { + return a.second > b.second; + }); + + // Take top 4 heavy ranks + num_heavy = std::min(4, (int)rank_times_vec.size()); + if (num_heavy > 0) + { + for (int i = 0; i < num_heavy; i++) + { + heavy_ranks[i] = rank_times_vec[i].first; + } + return true; // Load balancing is needed + } + + return false; // No load balancing needed +} + +// Split blocks belonging to heavy ranks to improve load balancing +// Strategy: Split heavy rank blocks in half, merge 8 light ranks to free 4 ranks +void Parallel::split_heavy_blocks(MyList *PatL, int *heavy_ranks, int num_heavy, + int split_factor, int cpusize, int ingfsi, int fngfsi) +{ + int myrank, nprocs; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + if (myrank != 0) return; // Only rank 0 performs the analysis + + cout << "\n=== Load Balancing Strategy ===" << endl; + cout << "Heavy ranks to split (in half): " << num_heavy << endl; + for (int i = 0; i < num_heavy; i++) + cout << " Heavy rank " << heavy_ranks[i] << endl; + + // Step 1: Identify all blocks and their ranks + std::vector all_ranks; + std::map> rank_to_blocks; + + MyList *PL = PatL; + while (PL) + { + Patch *PP = PL->data; + MyList *BP = PP->blb; + while (BP) + { + Block *block = BP->data; + all_ranks.push_back(block->rank); + rank_to_blocks[block->rank].push_back(block); + BP = BP->next; + } + PL = PL->next; + } + + // Step 2: Identify light ranks (not in heavy_ranks list) + std::set heavy_set(heavy_ranks, heavy_ranks + num_heavy); + std::vector light_ranks; + for (int r : all_ranks) + { + if (heavy_set.find(r) == heavy_set.end()) + { + light_ranks.push_back(r); + } + } + + // Remove duplicates from light_ranks + std::sort(light_ranks.begin(), light_ranks.end()); + light_ranks.erase(std::unique(light_ranks.begin(), light_ranks.end()), light_ranks.end()); + + cout << "Found " << light_ranks.size() << " light ranks (candidates for merging)" << endl; + + // Step 3: Select 8 light ranks to merge (those with smallest workload) + // For now, we select the first 8 light ranks + int num_to_merge = 8; + if (light_ranks.size() < num_to_merge) + { + cout << "WARNING: Not enough light ranks to merge. Found " << light_ranks.size() + << ", need " << num_to_merge << endl; + num_to_merge = light_ranks.size(); + } + + std::vector ranks_to_merge(light_ranks.begin(), light_ranks.begin() + num_to_merge); + + cout << "Light ranks to merge (8 -> 4 merged ranks):" << endl; + for (int i = 0; i < num_to_merge; i++) + cout << " Rank " << ranks_to_merge[i] << endl; + + // Step 4: Analyze blocks that need to be split + cout << "\n=== Analyzing blocks for splitting ===" << endl; + + struct BlockSplitInfo { + Block *original_block; + int split_dim; + int split_point; + }; + + std::vector blocks_to_split; + + PL = PatL; + while (PL) + { + Patch *PP = PL->data; + MyList *BP = PP->blb; + while (BP) + { + Block *block = BP->data; + + // Check if this block belongs to a heavy rank + for (int i = 0; i < num_heavy; i++) + { + if (block->rank == heavy_ranks[i]) + { + // Find the largest dimension for splitting + int max_dim = 0; + int max_size = block->shape[0]; + for (int d = 1; d < dim; d++) + { + if (block->shape[d] > max_size) + { + max_size = block->shape[d]; + max_dim = d; + } + } + + int split_point = max_size / 2; + + BlockSplitInfo info; + info.original_block = block; + info.split_dim = max_dim; + info.split_point = split_point; + blocks_to_split.push_back(info); + + cout << "Block at rank " << block->rank << " will be split" << endl; + cout << " Shape: [" << block->shape[0] << ", " << block->shape[1] << ", " << block->shape[2] << "]" << endl; + cout << " Split along dimension " << max_dim << " at index " << split_point << endl; + break; + } + } + + BP = BP->next; + } + PL = PL->next; + } + + cout << "\nTotal blocks to split: " << blocks_to_split.size() << endl; + + // Step 5: Calculate new rank assignments + // Strategy: + // - For each heavy rank, its blocks are split in half + // - First half keeps the original rank + // - Second half gets a new rank (from the freed light ranks) + // - 8 light ranks are merged into 4 ranks, freeing up 4 ranks + + std::vector freed_ranks; + for (size_t i = 0; i < ranks_to_merge.size(); i += 2) + { + // Merge pairs of light ranks: (ranks_to_merge[i], ranks_to_merge[i+1]) -> ranks_to_merge[i] + // This frees up ranks_to_merge[i+1] + if (i + 1 < ranks_to_merge.size()) + { + freed_ranks.push_back(ranks_to_merge[i + 1]); + cout << "Merging ranks " << ranks_to_merge[i] << " and " << ranks_to_merge[i + 1] + << " -> keeping rank " << ranks_to_merge[i] << ", freeing rank " << ranks_to_merge[i + 1] << endl; + } + } + + cout << "\nFreed ranks available for split blocks: "; + for (int r : freed_ranks) + cout << r << " "; + cout << endl; + + // Step 6: Assign new ranks to split blocks + int freed_idx = 0; + for (size_t i = 0; i < blocks_to_split.size(); i++) + { + BlockSplitInfo &info = blocks_to_split[i]; + Block *original = info.original_block; + + if (freed_idx < freed_ranks.size()) + { + cout << "\nSplitting block at rank " << original->rank << endl; + cout << " First half: keeps rank " << original->rank << endl; + cout << " Second half: gets new rank " << freed_ranks[freed_idx] << endl; + freed_idx++; + } + else + { + cout << "WARNING: Not enough freed ranks for all split blocks!" << endl; + break; + } + } + + cout << "\n=== Load Balancing Analysis Complete ===" << endl; + cout << "Next steps:" << endl; + cout << " 1. Recompose the grid with new rank assignments" << endl; + cout << " 2. Data migration will be handled by recompose_cgh" << endl; + cout << " 3. Ghost zone communication will be updated automatically" << endl; +} + diff --git a/AMSS_NCKU_source/Parallel.h b/AMSS_NCKU_source/Parallel.h index a6ef351..7538958 100644 --- a/AMSS_NCKU_source/Parallel.h +++ b/AMSS_NCKU_source/Parallel.h @@ -1,213 +1,235 @@ - -#ifndef PARALLEL_H -#define PARALLEL_H - -#include -#include -#include -#include -#include -#include -#include -#include -using namespace std; - -#include "Parallel_bam.h" -#include "var.h" -#include "MPatch.h" -#include "Block.h" -#include "MyList.h" -#include "macrodef.h" //need dim; ghost_width; CONTRACT -namespace Parallel -{ - struct gridseg - { - double llb[dim]; - double uub[dim]; - int shape[dim]; - double illb[dim], iuub[dim]; // only use for OutBdLow2Hi - Block *Bg; - }; - int partition1(int &nx, int split_size, int min_width, int cpusize, int shape); // special for 1 diemnsion - int partition2(int *nxy, int split_size, int *min_width, int cpusize, int *shape); // special for 2 diemnsions - int partition3(int *nxyz, int split_size, int *min_width, int cpusize, int *shape); - MyList *distribute(MyList *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0); // produce corresponding Blocks - void KillBlocks(MyList *PatchLIST); - - void setfunction(MyList *BlL, var *vn, double func(double x, double y, double z)); - void setfunction(int rank, MyList *BlL, var *vn, double func(double x, double y, double z)); - void writefile(double time, int nx, int ny, int nz, double xmin, double xmax, double ymin, double ymax, - double zmin, double zmax, char *filename, double *data_out); - void writefile(double time, int nx, int ny, double xmin, double xmax, double ymin, double ymax, - char *filename, double *datain); - void getarrayindex(int DIM, int *shape, int *index, int n); - int getarraylocation(int DIM, int *shape, int *index); - void copy(int DIM, double *llbout, double *uubout, int *Dshape, double *DD, double *llbin, double *uubin, - int *shape, double *datain, double *llb, double *uub); - void Dump_CPU_Data(MyList *BlL, MyList *DumpList, char *tag, double time, double dT); - void Dump_Data(MyList *PL, MyList *DumpList, char *tag, double time, double dT); - void Dump_Data(Patch *PP, MyList *DumpList, char *tag, double time, double dT, int grd); - double *Collect_Data(Patch *PP, var *VP); - void d2Dump_Data(MyList *PL, MyList *DumpList, char *tag, double time, double dT); - void d2Dump_Data(Patch *PP, MyList *DumpList, char *tag, double time, double dT, int grd); - void Dump_Data0(Patch *PP, MyList *DumpList, char *tag, double time, double dT); - double global_interp(int DIM, int *ext, double **CoX, double *datain, - double *poX, int ordn, double *SoA, int Symmetry); - double global_interp(int DIM, int *ext, double **CoX, double *datain, - double *poX, int ordn); - double Lagrangian_Int(double x, int npts, double *xpts, double *funcvals); - double LagrangePoly(double x, int pt, int npts, double *xpts); - MyList *build_complete_gsl(Patch *Pat); - MyList *build_complete_gsl(MyList *PatL); - MyList *build_complete_gsl_virtual(MyList *PatL); - MyList *build_complete_gsl_virtual2(MyList *PatL); // - buffer - MyList *build_owned_gsl0(Patch *Pat, int rank_in); // - ghost without extension, special for Sync usage - MyList *build_owned_gsl1(Patch *Pat, int rank_in); // - ghost, similar to build_owned_gsl0 but extend one point on left side for vertex grid - MyList *build_owned_gsl2(Patch *Pat, int rank_in); // - buffer - ghost - MyList *build_owned_gsl3(Patch *Pat, int rank_in, int Symmetry); // - ghost - BD ghost - MyList *build_owned_gsl4(Patch *Pat, int rank_in, int Symmetry); // - buffer - ghost - BD ghost - MyList *build_owned_gsl5(Patch *Pat, int rank_in); // similar to build_owned_gsl2 but no extension - MyList *build_owned_gsl(MyList *PatL, int rank_in, int type, int Symmetry); - void build_gstl(MyList *srci, MyList *dsti, MyList **out_src, MyList **out_dst); - int data_packer(double *data, MyList *src, MyList *dst, int rank_in, int dir, - MyList *VarLists, MyList *VarListd, int Symmetry); - void transfer(MyList **src, MyList **dst, - MyList *VarList1 /* source */, MyList *VarList2 /*target */, - int Symmetry); - int data_packermix(double *data, MyList *src, MyList *dst, int rank_in, int dir, - MyList *VarLists, MyList *VarListd, int Symmetry); - void transfermix(MyList **src, MyList **dst, - MyList *VarList1 /* source */, MyList *VarList2 /*target */, - int Symmetry); - void Sync(Patch *Pat, MyList *VarList, int Symmetry); - void Sync(MyList *PatL, MyList *VarList, int Symmetry); - void Sync_merged(MyList *PatL, MyList *VarList, int Symmetry); - - struct SyncCache { - bool valid; - int cpusize; - MyList **combined_src; - MyList **combined_dst; - int *send_lengths; - int *recv_lengths; - double **send_bufs; - double **recv_bufs; - int *send_buf_caps; - int *recv_buf_caps; - MPI_Request *reqs; - MPI_Status *stats; - int max_reqs; - bool lengths_valid; - SyncCache(); - void invalidate(); - void destroy(); - }; - - void Sync_cached(MyList *PatL, MyList *VarList, int Symmetry, SyncCache &cache); - void transfer_cached(MyList **src, MyList **dst, - MyList *VarList1, MyList *VarList2, - int Symmetry, SyncCache &cache); - - struct AsyncSyncState { - int req_no; - bool active; - AsyncSyncState() : req_no(0), active(false) {} - }; - - void Sync_start(MyList *PatL, MyList *VarList, int Symmetry, - SyncCache &cache, AsyncSyncState &state); - void Sync_finish(SyncCache &cache, AsyncSyncState &state, - MyList *VarList, int Symmetry); - void OutBdLow2Hi(Patch *Patc, Patch *Patf, - MyList *VarList1 /* source */, MyList *VarList2 /* target */, - int Symmetry); - void OutBdLow2Hi(MyList *PatcL, MyList *PatfL, - MyList *VarList1 /* source */, MyList *VarList2 /* target */, - int Symmetry); - void OutBdLow2Himix(Patch *Patc, Patch *Patf, - MyList *VarList1 /* source */, MyList *VarList2 /* target */, - int Symmetry); - void OutBdLow2Himix(MyList *PatcL, MyList *PatfL, - MyList *VarList1 /* source */, MyList *VarList2 /* target */, - int Symmetry); - void Restrict_cached(MyList *PatcL, MyList *PatfL, - MyList *VarList1, MyList *VarList2, - int Symmetry, SyncCache &cache); - void OutBdLow2Hi_cached(MyList *PatcL, MyList *PatfL, - MyList *VarList1, MyList *VarList2, - int Symmetry, SyncCache &cache); - void OutBdLow2Himix_cached(MyList *PatcL, MyList *PatfL, - MyList *VarList1, MyList *VarList2, - int Symmetry, SyncCache &cache); - void Prolong(Patch *Patc, Patch *Patf, - MyList *VarList1 /* source */, MyList *VarList2 /* target */, - int Symmetry); - void Prolongint(Patch *Patc, Patch *Patf, - MyList *VarList1 /* source */, MyList *VarList2 /* target */, - int Symmetry); - void Restrict(MyList *PatcL, MyList *PatfL, - MyList *VarList1 /* source */, MyList *VarList2 /* target */, - int Symmetry); - void Restrict_after(MyList *PatcL, MyList *PatfL, - MyList *VarList1 /* source */, MyList *VarList2 /* target */, - int Symmetry); // for -ghost - BDghost - MyList *build_PhysBD_gsl(Patch *Pat); - MyList *build_ghost_gsl(MyList *PatL); - MyList *build_ghost_gsl(Patch *Pat); - MyList *build_buffer_gsl(Patch *Pat); - MyList *build_buffer_gsl(MyList *PatL); - MyList *gsl_subtract(MyList *A, MyList *B); - MyList *gs_subtract(MyList *A, MyList *B); - MyList *gsl_and(MyList *A, MyList *B); - MyList *gs_and(MyList *A, MyList *B); - MyList *clone_gsl(MyList *p, bool first_only); - MyList *build_bulk_gsl(Patch *Pat); // similar to build_owned_gsl0 but does not care rank issue - MyList *build_bulk_gsl(Block *bp, Patch *Pat); - void build_PhysBD_gstl(Patch *Pat, MyList *srci, MyList *dsti, - MyList **out_src, MyList **out_dst); - void PeriodicBD(Patch *Pat, MyList *VarList, int Symmetry); - double L2Norm(Patch *Pat, var *vf); - void checkgsl(MyList *pp, bool first_only); - void checkvarl(MyList *pp, bool first_only); - MyList *divide_gsl(MyList *p, Patch *Pat); - MyList *divide_gs(MyList *p, Patch *Pat); - void prepare_inter_time_level(Patch *Pat, - MyList *VarList1 /* source (t+dt) */, MyList *VarList2 /* source (t) */, - MyList *VarList3 /* target (t+a*dt) */, int tindex); - void prepare_inter_time_level(Patch *Pat, - MyList *VarList1 /* source (t+dt) */, MyList *VarList2 /* source (t) */, - MyList *VarList3 /* source (t-dt) */, MyList *VarList4 /* target (t+a*dt) */, int tindex); - void prepare_inter_time_level(MyList *PatL, - MyList *VarList1 /* source (t+dt) */, MyList *VarList2 /* source (t) */, - MyList *VarList3 /* target (t+a*dt) */, int tindex); - void prepare_inter_time_level(MyList *Pat, - MyList *VarList1 /* source (t+dt) */, MyList *VarList2 /* source (t) */, - MyList *VarList3 /* source (t-dt) */, MyList *VarList4 /* target (t+a*dt) */, int tindex); - void merge_gsl(MyList *&A, const double ratio); - bool merge_gs(MyList *D, MyList *B, MyList *&C, const double ratio); - // Add ghost region to tangent plane - // we assume the grids have the same resolution - void add_ghost_touch(MyList *&A); - void cut_gsl(MyList *&A); - bool cut_gs(MyList *D, MyList *B, MyList *&C); - MyList *gs_subtract_virtual(MyList *A, MyList *B); - void fill_level_data(MyList *PatLd, MyList *PatLs, MyList *PatcL, - MyList *OldList, MyList *StateList, MyList *FutureList, - MyList *tmList, int Symmetry, bool BB, bool CC); - bool PatList_Interp_Points(MyList *PatL, MyList *VarList, - int NN, double **XX, - double *Shellf, int Symmetry); - void aligncheck(double *bbox0, double *bboxl, int lev, double *DH0, int *shape); - bool point_locat_gsl(double *pox, MyList *gsl); - void checkpatchlist(MyList *PatL, bool buflog); - - double L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here); - bool PatList_Interp_Points(MyList *PatL, MyList *VarList, - int NN, double **XX, - double *Shellf, int Symmetry, MPI_Comm Comm_here); -#if (PSTR == 1 || PSTR == 2 || PSTR == 3) - MyList *distribute(MyList *PatchLIST, int cpusize, int ingfsi, int fngfsi, - bool periodic, int start_rank, int end_rank, int nodes = 0); -#endif -} -#endif /*PARALLEL_H */ + +#ifndef PARALLEL_H +#define PARALLEL_H + +#include +#include +#include +#include +#include +#include +#include +#include +using namespace std; +#include +#include "Parallel_bam.h" +#include "var.h" +#include "MPatch.h" +#include "Block.h" +#include "MyList.h" +#include "macrodef.h" //need dim; ghost_width; CONTRACT +namespace Parallel +{ + struct gridseg + { + double llb[dim]; + double uub[dim]; + int shape[dim]; + double illb[dim], iuub[dim]; // only use for OutBdLow2Hi + Block *Bg; + }; + int partition1(int &nx, int split_size, int min_width, int cpusize, int shape); // special for 1 diemnsion + int partition2(int *nxy, int split_size, int *min_width, int cpusize, int *shape); // special for 2 diemnsions + int partition3(int *nxyz, int split_size, int *min_width, int cpusize, int *shape); + MyList *distribute(MyList *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0); // produce corresponding Blocks + MyList *distribute_hard(MyList *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0); // produce corresponding Blocks + Block* splitHotspotBlock(MyList* &BlL, int _dim, + int ib0_orig, int ib3_orig, + int jb1_orig, int jb4_orig, + int kb2_orig, int kb5_orig, + Patch* PP, int r_left, int r_right, + int ingfsi, int fngfsi, bool periodic, + Block* &split_first_block, Block* &split_last_block); + Block* createMappedBlock(MyList* &BlL, int _dim, int* shape, double* bbox, + int block_id, int ingfsi, int fngfsi, int lev); + void KillBlocks(MyList *PatchLIST); + + void setfunction(MyList *BlL, var *vn, double func(double x, double y, double z)); + void setfunction(int rank, MyList *BlL, var *vn, double func(double x, double y, double z)); + void writefile(double time, int nx, int ny, int nz, double xmin, double xmax, double ymin, double ymax, + double zmin, double zmax, char *filename, double *data_out); + void writefile(double time, int nx, int ny, double xmin, double xmax, double ymin, double ymax, + char *filename, double *datain); + void getarrayindex(int DIM, int *shape, int *index, int n); + int getarraylocation(int DIM, int *shape, int *index); + void copy(int DIM, double *llbout, double *uubout, int *Dshape, double *DD, double *llbin, double *uubin, + int *shape, double *datain, double *llb, double *uub); + void Dump_CPU_Data(MyList *BlL, MyList *DumpList, char *tag, double time, double dT); + void Dump_Data(MyList *PL, MyList *DumpList, char *tag, double time, double dT); + void Dump_Data(Patch *PP, MyList *DumpList, char *tag, double time, double dT, int grd); + double *Collect_Data(Patch *PP, var *VP); + void d2Dump_Data(MyList *PL, MyList *DumpList, char *tag, double time, double dT); + void d2Dump_Data(Patch *PP, MyList *DumpList, char *tag, double time, double dT, int grd); + void Dump_Data0(Patch *PP, MyList *DumpList, char *tag, double time, double dT); + double global_interp(int DIM, int *ext, double **CoX, double *datain, + double *poX, int ordn, double *SoA, int Symmetry); + double global_interp(int DIM, int *ext, double **CoX, double *datain, + double *poX, int ordn); + double Lagrangian_Int(double x, int npts, double *xpts, double *funcvals); + double LagrangePoly(double x, int pt, int npts, double *xpts); + MyList *build_complete_gsl(Patch *Pat); + MyList *build_complete_gsl(MyList *PatL); + MyList *build_complete_gsl_virtual(MyList *PatL); + MyList *build_complete_gsl_virtual2(MyList *PatL); // - buffer + MyList *build_owned_gsl0(Patch *Pat, int rank_in); // - ghost without extension, special for Sync usage + MyList *build_owned_gsl1(Patch *Pat, int rank_in); // - ghost, similar to build_owned_gsl0 but extend one point on left side for vertex grid + MyList *build_owned_gsl2(Patch *Pat, int rank_in); // - buffer - ghost + MyList *build_owned_gsl3(Patch *Pat, int rank_in, int Symmetry); // - ghost - BD ghost + MyList *build_owned_gsl4(Patch *Pat, int rank_in, int Symmetry); // - buffer - ghost - BD ghost + MyList *build_owned_gsl5(Patch *Pat, int rank_in); // similar to build_owned_gsl2 but no extension + MyList *build_owned_gsl(MyList *PatL, int rank_in, int type, int Symmetry); + void build_gstl(MyList *srci, MyList *dsti, MyList **out_src, MyList **out_dst); + int data_packer(double *data, MyList *src, MyList *dst, int rank_in, int dir, + MyList *VarLists, MyList *VarListd, int Symmetry); + void transfer(MyList **src, MyList **dst, + MyList *VarList1 /* source */, MyList *VarList2 /*target */, + int Symmetry); + int data_packermix(double *data, MyList *src, MyList *dst, int rank_in, int dir, + MyList *VarLists, MyList *VarListd, int Symmetry); + void transfermix(MyList **src, MyList **dst, + MyList *VarList1 /* source */, MyList *VarList2 /*target */, + int Symmetry); + void Sync(Patch *Pat, MyList *VarList, int Symmetry); + void Sync(MyList *PatL, MyList *VarList, int Symmetry); + void Sync_merged(MyList *PatL, MyList *VarList, int Symmetry); + + struct SyncCache { + bool valid; + int cpusize; + MyList **combined_src; + MyList **combined_dst; + int *send_lengths; + int *recv_lengths; + double **send_bufs; + double **recv_bufs; + int *send_buf_caps; + int *recv_buf_caps; + MPI_Request *reqs; + MPI_Status *stats; + int max_reqs; + bool lengths_valid; + SyncCache(); + void invalidate(); + void destroy(); + }; + + void Sync_cached(MyList *PatL, MyList *VarList, int Symmetry, SyncCache &cache); + void transfer_cached(MyList **src, MyList **dst, + MyList *VarList1, MyList *VarList2, + int Symmetry, SyncCache &cache); + + struct AsyncSyncState { + int req_no; + bool active; + AsyncSyncState() : req_no(0), active(false) {} + }; + + void Sync_start(MyList *PatL, MyList *VarList, int Symmetry, + SyncCache &cache, AsyncSyncState &state); + void Sync_finish(SyncCache &cache, AsyncSyncState &state, + MyList *VarList, int Symmetry); + void OutBdLow2Hi(Patch *Patc, Patch *Patf, + MyList *VarList1 /* source */, MyList *VarList2 /* target */, + int Symmetry); + void OutBdLow2Hi(MyList *PatcL, MyList *PatfL, + MyList *VarList1 /* source */, MyList *VarList2 /* target */, + int Symmetry); + void OutBdLow2Himix(Patch *Patc, Patch *Patf, + MyList *VarList1 /* source */, MyList *VarList2 /* target */, + int Symmetry); + void OutBdLow2Himix(MyList *PatcL, MyList *PatfL, + MyList *VarList1 /* source */, MyList *VarList2 /* target */, + int Symmetry); + void Restrict_cached(MyList *PatcL, MyList *PatfL, + MyList *VarList1, MyList *VarList2, + int Symmetry, SyncCache &cache); + void OutBdLow2Hi_cached(MyList *PatcL, MyList *PatfL, + MyList *VarList1, MyList *VarList2, + int Symmetry, SyncCache &cache); + void OutBdLow2Himix_cached(MyList *PatcL, MyList *PatfL, + MyList *VarList1, MyList *VarList2, + int Symmetry, SyncCache &cache); + void Prolong(Patch *Patc, Patch *Patf, + MyList *VarList1 /* source */, MyList *VarList2 /* target */, + int Symmetry); + void Prolongint(Patch *Patc, Patch *Patf, + MyList *VarList1 /* source */, MyList *VarList2 /* target */, + int Symmetry); + void Restrict(MyList *PatcL, MyList *PatfL, + MyList *VarList1 /* source */, MyList *VarList2 /* target */, + int Symmetry); + void Restrict_after(MyList *PatcL, MyList *PatfL, + MyList *VarList1 /* source */, MyList *VarList2 /* target */, + int Symmetry); // for -ghost - BDghost + MyList *build_PhysBD_gsl(Patch *Pat); + MyList *build_ghost_gsl(MyList *PatL); + MyList *build_ghost_gsl(Patch *Pat); + MyList *build_buffer_gsl(Patch *Pat); + MyList *build_buffer_gsl(MyList *PatL); + MyList *gsl_subtract(MyList *A, MyList *B); + MyList *gs_subtract(MyList *A, MyList *B); + MyList *gsl_and(MyList *A, MyList *B); + MyList *gs_and(MyList *A, MyList *B); + MyList *clone_gsl(MyList *p, bool first_only); + MyList *build_bulk_gsl(Patch *Pat); // similar to build_owned_gsl0 but does not care rank issue + MyList *build_bulk_gsl(Block *bp, Patch *Pat); + void build_PhysBD_gstl(Patch *Pat, MyList *srci, MyList *dsti, + MyList **out_src, MyList **out_dst); + void PeriodicBD(Patch *Pat, MyList *VarList, int Symmetry); + double L2Norm(Patch *Pat, var *vf); + void checkgsl(MyList *pp, bool first_only); + void checkvarl(MyList *pp, bool first_only); + MyList *divide_gsl(MyList *p, Patch *Pat); + MyList *divide_gs(MyList *p, Patch *Pat); + void prepare_inter_time_level(Patch *Pat, + MyList *VarList1 /* source (t+dt) */, MyList *VarList2 /* source (t) */, + MyList *VarList3 /* target (t+a*dt) */, int tindex); + void prepare_inter_time_level(Patch *Pat, + MyList *VarList1 /* source (t+dt) */, MyList *VarList2 /* source (t) */, + MyList *VarList3 /* source (t-dt) */, MyList *VarList4 /* target (t+a*dt) */, int tindex); + void prepare_inter_time_level(MyList *PatL, + MyList *VarList1 /* source (t+dt) */, MyList *VarList2 /* source (t) */, + MyList *VarList3 /* target (t+a*dt) */, int tindex); + void prepare_inter_time_level(MyList *Pat, + MyList *VarList1 /* source (t+dt) */, MyList *VarList2 /* source (t) */, + MyList *VarList3 /* source (t-dt) */, MyList *VarList4 /* target (t+a*dt) */, int tindex); + void merge_gsl(MyList *&A, const double ratio); + bool merge_gs(MyList *D, MyList *B, MyList *&C, const double ratio); + // Add ghost region to tangent plane + // we assume the grids have the same resolution + void add_ghost_touch(MyList *&A); + void cut_gsl(MyList *&A); + bool cut_gs(MyList *D, MyList *B, MyList *&C); + MyList *gs_subtract_virtual(MyList *A, MyList *B); + void fill_level_data(MyList *PatLd, MyList *PatLs, MyList *PatcL, + MyList *OldList, MyList *StateList, MyList *FutureList, + MyList *tmList, int Symmetry, bool BB, bool CC); + bool PatList_Interp_Points(MyList *PatL, MyList *VarList, + int NN, double **XX, + double *Shellf, int Symmetry); + void aligncheck(double *bbox0, double *bboxl, int lev, double *DH0, int *shape); + bool point_locat_gsl(double *pox, MyList *gsl); + void checkpatchlist(MyList *PatL, bool buflog); + + double L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here); + bool PatList_Interp_Points(MyList *PatL, MyList *VarList, + int NN, double **XX, + double *Shellf, int Symmetry, MPI_Comm Comm_here); +#if (PSTR == 1 || PSTR == 2 || PSTR == 3) + MyList *distribute(MyList *PatchLIST, int cpusize, int ingfsi, int fngfsi, + bool periodic, int start_rank, int end_rank, int nodes = 0); + + // Redistribute blocks with time statistics for load balancing + MyList *distribute(MyList *PatchLIST, MyList *OldBlockL, + int cpusize, int ingfsi, int fngfsi, + bool periodic, int start_rank, int end_rank, int nodes = 0); +#endif + + // Dynamic load balancing: split blocks for heavy ranks + void split_heavy_blocks(MyList *PatL, int *heavy_ranks, int num_heavy, + int split_factor, int cpusize, int ingfsi, int fngfsi); + + // Check if load balancing is needed based on interpolation times + bool check_load_balance_need(double *rank_times, int nprocs, int &num_heavy, int *heavy_ranks); + } + #endif /*PARALLEL_H */ diff --git a/AMSS_NCKU_source/cgh.C b/AMSS_NCKU_source/cgh.C index e27ccd6..a72ba42 100644 --- a/AMSS_NCKU_source/cgh.C +++ b/AMSS_NCKU_source/cgh.C @@ -1,1707 +1,1839 @@ - -#ifdef newc -#include -#include -#include -#include -#include -#include -#include -#include -using namespace std; -#else -#include -#include -#include -#include -#include -#include -#include -#include -#endif - -#include - -#include "macrodef.h" -#include "misc.h" -#include "cgh.h" -#include "Parallel.h" -#include "parameters.h" - -//================================================================================================ - -// define cgh class - -//================================================================================================ - -cgh::cgh(int ingfsi, int fngfsi, int Symmetry, char *filename, int checkrun, - monitor *ErrorMonitor) : ingfs(ingfsi), fngfs(fngfsi), trfls(0) -{ -#if (PSTR == 1 || PSTR == 2 || PSTR == 3) - Commlev = 0; - start_rank = 0; - end_rank = 0; -#endif - - if (!checkrun) - { - read_bbox(Symmetry, filename); - sethandle(ErrorMonitor); - for (int lev = 0; lev < levels; lev++) - PatL[lev] = construct_patchlist(lev, Symmetry); - } -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function is the destructor; it releases allocated resources and deletes variables - -//================================================================================================ - -cgh::~cgh() -{ - for (int lev = 0; lev < levels; lev++) - { - for (int grd = 0; grd < grids[lev]; grd++) - { - delete[] bbox[lev][grd]; - delete[] shape[lev][grd]; - delete[] handle[lev][grd]; - } - delete[] bbox[lev]; - delete[] shape[lev]; - delete[] handle[lev]; - Parallel::KillBlocks(PatL[lev]); - PatL[lev]->destroyList(); -#if (RPB == 1) - Parallel::destroypsuList_bam(bdsul[lev]); - Parallel::destroypsuList_bam(rsul[lev]); -#endif - } - delete[] grids; - delete[] Lt; - delete[] bbox; - delete[] shape; - delete[] handle; - delete[] PatL; -#if (RPB == 1) - delete[] bdsul; - delete[] rsul; -#endif - -#if (PSTR == 1 || PSTR == 2 || PSTR == 3) - for (int lev = 0; lev < levels; lev++) - { - MPI_Comm_free(&Commlev[lev]); - } - - if (Commlev) - delete[] Commlev; - if (start_rank) - delete[] start_rank; - if (end_rank) - delete[] end_rank; -#endif - for (int lev = 0; lev < levels; lev++) - { - for (int ibh = 0; ibh < BH_num_in; ibh++) - delete[] Porgls[lev][ibh]; - delete[] Porgls[lev]; - } - delete[] Porgls; -} - -//================================================================================================ - - -//================================================================================================ - -// This member function constructs the computational grid - -//================================================================================================ - -#if (PSTR == 0) -void cgh::compose_cgh(int nprocs) -{ - for (int lev = 0; lev < levels; lev++) - { - checkPatchList(PatL[lev], false); - Parallel::distribute(PatL[lev], nprocs, ingfs, fngfs, false); -#if (RPB == 1) - // we need distributed box of PatL[lev] and PatL[lev-1] - if (lev > 0) - { - Parallel::Constr_pointstr_OutBdLow2Hi(PatL[lev], PatL[lev - 1], bdsul[lev]); - Parallel::Constr_pointstr_Restrict(PatL[lev], PatL[lev - 1], rsul[lev]); - } - else - { - bdsul[lev] = 0; - rsul[lev] = 0; - } -#endif - } -} - -//================================================================================================ - - -//================================================================================================ - -// This member function constructs the computational grid -// For the cases PSTR == 1 and PSTR == 2 - -//================================================================================================ - -#elif (PSTR == 1 || PSTR == 2) -void cgh::compose_cgh(int nprocs) -{ - Commlev = new MPI_Comm[levels]; - construct_mylev(nprocs); - for (int lev = 0; lev < levels; lev++) - { - MPI_Comm_split(MPI_COMM_WORLD, mylev, lev, &Commlev[lev]); - checkPatchList(PatL[lev], false); - Parallel::distribute(PatL[lev], end_rank[lev] - start_rank[lev] + 1, ingfs, fngfs, false, start_rank[lev], end_rank[lev]); -#if (RPB == 1) -#error "not support yet" -#endif - } - /* note different comm field has its own rank index - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD,&myrank); - if(myrank==nprocs-1) - { - cout<<"myrank = "<= start_rank[lev] && myrank <= end_rank[lev]) - mylev = lev; - } -} -#elif (PSTR == 2) -void cgh::construct_mylev(int nprocs) -{ - if (nprocs < levels) - { - cout << "Too few procs to use parallel level methods!" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - start_rank = new int[levels]; - end_rank = new int[levels]; - - int myrank; - - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - - int mp; - mp = nprocs / levels; - - start_rank[levels - 1] = 0; - end_rank[levels - 1] = mp - 1; - for (int lev = levels - 2; lev > 0; lev--) - { - start_rank[lev] = end_rank[lev - 1] + 1; - end_rank[lev] = end_rank[lev - 1] + mp; - } - start_rank[0] = end_rank[1] + 1; - end_rank[0] = nprocs - 1; - - for (int lev = levels - 1; lev >= 0; lev--) - { - if (myrank >= start_rank[lev] && myrank <= end_rank[lev]) - mylev = lev; - } -} -#endif - -#elif (PSTR == 3) -void cgh::construct_mylev(int nprocs) -{ - if (nprocs <= 1) - { - cout << " cgh::construct_mylev requires at least 2 procs" << endl; - exit(0); - } - - start_rank = new int[2]; - end_rank = new int[2]; - - int myrank; - - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - - int mp; - mp = nprocs / 2; - - // for other levels - for (int lev = 0; lev < levels - 1; lev++) - { - start_rank[lev] = 0; - end_rank[lev] = mp - 1; - } - // for finest level - start_rank[levels - 1] = end_rank[0] + 1; - end_rank[levels - 1] = nprocs - 1; - - if (myrank >= start_rank[0] && myrank <= end_rank[0]) - mylev = -1; // for other levels - else - mylev = 1; // for finest level -} - - -//----------------------------------------------------------------------- - - -void cgh::compose_cgh(int nprocs) -{ - Commlev = new MPI_Comm[levels]; - construct_mylev(nprocs); - - for (int lev = 0; lev < levels - 1; lev++) - { - MPI_Comm_split(MPI_COMM_WORLD, mylev, -1, &Commlev[lev]); - } - MPI_Comm_split(MPI_COMM_WORLD, mylev, 1, &Commlev[levels - 1]); - - for (int lev = 0; lev < levels; lev++) - { - checkPatchList(PatL[lev], false); - Parallel::distribute(PatL[lev], end_rank[lev] - start_rank[lev] + 1, ingfs, fngfs, false, start_rank[lev], end_rank[lev]); -#if (RPB == 1) -#error "not support yet" -#endif - } -} -#endif - - -void cgh::sethandle(monitor *ErrorMonitor) -{ - int BH_num; - Porgls = new double **[levels]; - char filename[100]; - { - map::iterator iter = parameters::str_par.find("inputpar"); - if (iter != parameters::str_par.end()) - { - strcpy(filename, (iter->second).c_str()); - } - else - { - cout << "Error inputpar" << endl; - exit(0); - } - } - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - ifstream inf(filename, ifstream::in); - if (!inf.good() && ErrorMonitor && ErrorMonitor->outfile) - { - ErrorMonitor->outfile << "Can not open parameter file " << filename << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - if (ErrorMonitor && ErrorMonitor->outfile) - ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "BSSN" && skey == "BH_num") - BH_num = atoi(sval.c_str()); - else if (sgrp == "cgh" && skey == "moving levels start from") - { - movls = atoi(sval.c_str()); - movls = Mymin(movls, levels); - movls = Mymax(0, movls); - } - } - inf.close(); - } - for (int lev = 0; lev < levels; lev++) - { - Porgls[lev] = new double *[BH_num]; - for (int i = 0; i < BH_num; i++) - Porgls[lev][i] = new double[dim]; - } - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - ifstream inf(filename, ifstream::in); - if (!inf.good() && ErrorMonitor && ErrorMonitor->outfile) - { - ErrorMonitor->outfile << "Can not open parameter file " << filename - << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - if (ErrorMonitor && ErrorMonitor->outfile) - ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "BSSN" && sind < BH_num) - { - if (skey == "Porgx") - { - for (int lev = 0; lev < levels; lev++) - Porgls[lev][sind][0] = atof(sval.c_str()); - } - else if (skey == "Porgy") - { - for (int lev = 0; lev < levels; lev++) - Porgls[lev][sind][1] = atof(sval.c_str()); - } - else if (skey == "Porgz") - { - for (int lev = 0; lev < levels; lev++) - Porgls[lev][sind][2] = atof(sval.c_str()); - } - } - } - inf.close(); - } - - for (int lev = 0; lev < movls; lev++) - for (int grd = 0; grd < grids[lev]; grd++) - for (int i = 0; i < dim; i++) - handle[lev][grd][i] = 0; - - if (movls < levels) - { - if (ErrorMonitor && ErrorMonitor->I_Print) - { - cout << endl; - cout << " moving levels are lev #" << movls << "--" << levels - 1 << endl; - cout << endl; - } - - for (int lev = movls; lev < levels; lev++) - for (int grd = 0; grd < grids[lev]; grd++) - { -#if 0 - int bht=0; - for(int bhi=0;bhi bbox[lev][grd][i+dim]) {flag=true; break;} - if(flag) continue; - bht++; - if(bht==1) for(int i=0;ioutfile) - { - ErrorMonitor->outfile<<"cgh::sethandle: lev#"< dis1) - { - bht = bhi; - dis0 = dis1; - } // chose nearest one - } - } - for (int i = 0; i < dim; i++) - handle[lev][grd][i] = Porgls[0][bht][i]; -#endif - } - } - else if (ErrorMonitor && ErrorMonitor->I_Print) - { - if (levels > 1) - cout << "fixed mesh refinement!" << endl; - else - cout << "unigrid simulation!" << endl; - } - - BH_num_in = BH_num; -} -void cgh::checkPatchList(MyList *PatL, bool buflog) -{ - while (PatL) - { - PatL->data->checkPatch(buflog); - PatL = PatL->next; - } -} - - -//================================================================================================ - -// This member function moves the grid - -//================================================================================================ - -void cgh::Regrid(int Symmetry, int BH_num, double **Porgbr, double **Porg0, - MyList *OldList, MyList *StateList, - MyList *FutureList, MyList *tmList, bool BB, - monitor *ErrorMonitor) -{ - // for moving part - if (movls < levels) - { - bool tot_flag = false; - bool *lev_flag; - double **tmpPorg; - tmpPorg = new double *[BH_num]; - for (int bhi = 0; bhi < BH_num; bhi++) - { - tmpPorg[bhi] = new double[dim]; - for (int i = 0; i < dim; i++) - tmpPorg[bhi][i] = Porgbr[bhi][i]; - } - lev_flag = new bool[levels - movls]; - for (int lev = movls; lev < levels; lev++) - { - lev_flag[lev - movls] = false; - for (int grd = 0; grd < grids[lev]; grd++) - { - int flag; - int do_every = 2; - double dX = PatL[lev]->data->blb->data->getdX(0); - double dY = PatL[lev]->data->blb->data->getdX(1); - double dZ = PatL[lev]->data->blb->data->getdX(2); - double rr; - // make sure that the grid corresponds to the black hole - int bhi = 0; - for (bhi = 0; bhi < BH_num; bhi++) - { - // because finner level may also change Porgbr, so we need factor 2 - if (feq(Porgbr[bhi][0], handle[lev][grd][0], 2 * do_every * dX) && - feq(Porgbr[bhi][1], handle[lev][grd][1], 2 * do_every * dY) && - feq(Porgbr[bhi][2], handle[lev][grd][2], 2 * do_every * dZ)) - break; - } - if (bhi == BH_num) - { - // if the box has already touched the original point - if (feq(0, bbox[lev][grd][0], dX / 2) && - feq(0, bbox[lev][grd][1], dY / 2) && - feq(0, bbox[lev][grd][2], dZ / 2)) - break; - - if (BH_num == 1) - { - bhi = 0; - break; - } // if only one black hole, it definitely match! - - if (ErrorMonitor->outfile) - { - ErrorMonitor->outfile << "cgh::Regrid: no black hole matches with grid lev#" << lev << " grd#" << grd - << " with handle (" << handle[lev][grd][0] << "," << handle[lev][grd][1] << "," << handle[lev][grd][2] << ")" << endl; - ErrorMonitor->outfile << "black holes' old positions:" << endl; - for (bhi = 0; bhi < BH_num; bhi++) - ErrorMonitor->outfile << "#" << bhi << ": (" << Porgbr[bhi][0] << "," << Porgbr[bhi][1] << "," << Porgbr[bhi][2] << ")" << endl; - ErrorMonitor->outfile << "tolerance:" << endl; - ErrorMonitor->outfile << "(" << 2 * do_every * dX << "," << 2 * do_every * dY << "," << 2 * do_every * dZ << ")" << endl; - ErrorMonitor->outfile << "box lower boundary: (" << bbox[lev][grd][0] << "," << bbox[lev][grd][1] << "," << bbox[lev][grd][2] << ")" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - delete[] lev_flag; - for (bhi = 0; bhi < BH_num; bhi++) - delete[] tmpPorg[bhi]; - delete[] tmpPorg; - return; - } - // x direction - rr = (Porg0[bhi][0] - handle[lev][grd][0]) / dX; - if (rr > 0) - flag = int(rr + 0.5) / do_every; - else - flag = int(rr - 0.5) / do_every; - flag = flag * do_every; - rr = bbox[lev][grd][0] + flag * dX; - // pay attention to the symmetric case - if (Symmetry == 2 && rr < 0) - rr = -bbox[lev][grd][0]; - else - rr = flag * dX; - - if (fabs(rr) > dX / 2) - { - lev_flag[lev - movls] = tot_flag = true; - bbox[lev][grd][0] = bbox[lev][grd][0] + rr; - bbox[lev][grd][3] = bbox[lev][grd][3] + rr; - handle[lev][grd][0] += rr; - tmpPorg[bhi][0] = Porg0[bhi][0]; - } - - // y direction - rr = (Porg0[bhi][1] - handle[lev][grd][1]) / dY; - if (rr > 0) - flag = int(rr + 0.5) / do_every; - else - flag = int(rr - 0.5) / do_every; - flag = flag * do_every; - rr = bbox[lev][grd][1] + flag * dY; - // pay attention to the symmetric case - if (Symmetry == 2 && rr < 0) - rr = -bbox[lev][grd][1]; - else - rr = flag * dY; - - if (fabs(rr) > dY / 2) - { - lev_flag[lev - movls] = tot_flag = true; - bbox[lev][grd][1] = bbox[lev][grd][1] + rr; - bbox[lev][grd][4] = bbox[lev][grd][4] + rr; - handle[lev][grd][1] += rr; - tmpPorg[bhi][1] = Porg0[bhi][1]; - } - - // z direction - rr = (Porg0[bhi][2] - handle[lev][grd][2]) / dZ; - if (rr > 0) - flag = int(rr + 0.5) / do_every; - else - flag = int(rr - 0.5) / do_every; - flag = flag * do_every; - rr = bbox[lev][grd][2] + flag * dZ; - // pay attention to the symmetric case - if (Symmetry > 0 && rr < 0) - rr = -bbox[lev][grd][1]; - else - rr = flag * dZ; - - if (fabs(rr) > dZ / 2) - { - lev_flag[lev - movls] = tot_flag = true; - bbox[lev][grd][2] = bbox[lev][grd][2] + rr; - bbox[lev][grd][5] = bbox[lev][grd][5] + rr; - handle[lev][grd][2] += rr; - tmpPorg[bhi][2] = Porg0[bhi][2]; - } - } - // if(ErrorMonitor->outfile && lev_flag[lev-movls]) cout<<"lev#"< *OldList, MyList *StateList, - MyList *FutureList, MyList *tmList, bool BB, - monitor *ErrorMonitor) -{ - // for moving part - if (movls < levels) - { - bool tot_flag = false; - bool *lev_flag; - double **tmpPorg; - tmpPorg = new double *[BH_num]; - for (int bhi = 0; bhi < BH_num; bhi++) - { - tmpPorg[bhi] = new double[dim]; - for (int i = 0; i < dim; i++) - tmpPorg[bhi][i] = Porgbr[bhi][i]; - } - lev_flag = new bool[levels - movls]; - for (int lev = movls; lev < levels; lev++) - { - lev_flag[lev - movls] = false; - for (int grd = 0; grd < grids[lev]; grd++) - { - int flag; - int do_every = 2; - double dX = PatL[lev]->data->blb->data->getdX(0); - double dY = PatL[lev]->data->blb->data->getdX(1); - double dZ = PatL[lev]->data->blb->data->getdX(2); - double rr; - // make sure that the grid corresponds to the black hole - int bhi = 0; - for (bhi = 0; bhi < BH_num; bhi++) - { - // because finner level may also change Porgbr, so we need factor 2 - if (feq(Porgbr[bhi][0], handle[lev][grd][0], 2 * do_every * dX) && - feq(Porgbr[bhi][1], handle[lev][grd][1], 2 * do_every * dY) && - feq(Porgbr[bhi][2], handle[lev][grd][2], 2 * do_every * dZ)) - break; - } - if (bhi == BH_num) - { - // if the box has already touched the original point - if (feq(0, bbox[lev][grd][0], dX / 2) && - feq(0, bbox[lev][grd][1], dY / 2) && - feq(0, bbox[lev][grd][2], dZ / 2)) - break; - - if (BH_num == 1) - { - bhi = 0; - break; - } // if only one black hole, it definitely match! - - if (ErrorMonitor->outfile) - { - ErrorMonitor->outfile << "cgh::Regrid: no black hole matches with grid lev#" << lev << " grd#" << grd - << " with handle (" << handle[lev][grd][0] << "," << handle[lev][grd][1] << "," << handle[lev][grd][2] << ")" << endl; - ErrorMonitor->outfile << "black holes' old positions:" << endl; - for (bhi = 0; bhi < BH_num; bhi++) - ErrorMonitor->outfile << "#" << bhi << ": (" << Porgbr[bhi][0] << "," << Porgbr[bhi][1] << "," << Porgbr[bhi][2] << ")" << endl; - ErrorMonitor->outfile << "tolerance:" << endl; - ErrorMonitor->outfile << "(" << 2 * do_every * dX << "," << 2 * do_every * dY << "," << 2 * do_every * dZ << ")" << endl; - ErrorMonitor->outfile << "box lower boundary: (" << bbox[lev][grd][0] << "," << bbox[lev][grd][1] << "," << bbox[lev][grd][2] << ")" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - delete[] lev_flag; - for (bhi = 0; bhi < BH_num; bhi++) - delete[] tmpPorg[bhi]; - delete[] tmpPorg; - return; - } - // x direction - rr = (Porg0[bhi][0] - handle[lev][grd][0]) / dX; - if (rr > 0) - flag = int(rr + 0.5) / do_every; - else - flag = int(rr - 0.5) / do_every; - flag = flag * do_every; - rr = bbox[lev][grd][0] + flag * dX; - // pay attention to the symmetric case - if (Symmetry == 2 && rr < 0) - rr = -bbox[lev][grd][0]; - else - rr = flag * dX; - - if (fabs(rr) > dX / 2) - { - lev_flag[lev - movls] = tot_flag = true; - bbox[lev][grd][0] = bbox[lev][grd][0] + rr; - bbox[lev][grd][3] = bbox[lev][grd][3] + rr; - handle[lev][grd][0] += rr; - tmpPorg[bhi][0] = Porg0[bhi][0]; - } - - // y direction - rr = (Porg0[bhi][1] - handle[lev][grd][1]) / dY; - if (rr > 0) - flag = int(rr + 0.5) / do_every; - else - flag = int(rr - 0.5) / do_every; - flag = flag * do_every; - rr = bbox[lev][grd][1] + flag * dY; - // pay attention to the symmetric case - if (Symmetry == 2 && rr < 0) - rr = -bbox[lev][grd][1]; - else - rr = flag * dY; - - if (fabs(rr) > dY / 2) - { - lev_flag[lev - movls] = tot_flag = true; - bbox[lev][grd][1] = bbox[lev][grd][1] + rr; - bbox[lev][grd][4] = bbox[lev][grd][4] + rr; - handle[lev][grd][1] += rr; - tmpPorg[bhi][1] = Porg0[bhi][1]; - } - - // z direction - rr = (Porg0[bhi][2] - handle[lev][grd][2]) / dZ; - if (rr > 0) - flag = int(rr + 0.5) / do_every; - else - flag = int(rr - 0.5) / do_every; - flag = flag * do_every; - rr = bbox[lev][grd][2] + flag * dZ; - // pay attention to the symmetric case - if (Symmetry > 0 && rr < 0) - rr = -bbox[lev][grd][1]; - else - rr = flag * dZ; - - if (fabs(rr) > dZ / 2) - { - lev_flag[lev - movls] = tot_flag = true; - bbox[lev][grd][2] = bbox[lev][grd][2] + rr; - bbox[lev][grd][5] = bbox[lev][grd][5] + rr; - handle[lev][grd][2] += rr; - tmpPorg[bhi][2] = Porg0[bhi][2]; - } - } - // if(ErrorMonitor->outfile && lev_flag[lev-movls]) cout<<"lev#"< *OldList, MyList *StateList, - MyList *FutureList, MyList *tmList, - int Symmetry, bool BB) -{ - for (int lev = movls; lev < levels; lev++) - if (lev_flag[lev - movls]) - { - MyList *tmPat = 0; - tmPat = construct_patchlist(lev, Symmetry); - // tmPat construction completes - Parallel::distribute(tmPat, nprocs, ingfs, fngfs, false); - // checkPatchList(tmPat,true); - bool CC = (lev > trfls); - Parallel::fill_level_data(tmPat, PatL[lev], PatL[lev - 1], OldList, StateList, FutureList, tmList, Symmetry, BB, CC); - - Parallel::KillBlocks(PatL[lev]); - PatL[lev]->destroyList(); - PatL[lev] = tmPat; -#if (RPB == 1) - Parallel::destroypsuList_bam(bdsul[lev]); - Parallel::destroypsuList_bam(rsul[lev]); - Parallel::Constr_pointstr_OutBdLow2Hi(PatL[lev], PatL[lev - 1], bdsul[lev]); - Parallel::Constr_pointstr_Restrict(PatL[lev], PatL[lev - 1], rsul[lev]); -#endif - } -} -#elif (PSTR == 1 || PSTR == 2 || PSTR == 3) -#warning "recompose_cgh is not implimented yet" -void cgh::recompose_cgh(int nprocs, bool *lev_flag, - MyList *OldList, MyList *StateList, - MyList *FutureList, MyList *tmList, - int Symmetry, bool BB) -{ - for (int lev = movls; lev < levels; lev++) - if (lev_flag[lev - movls]) - { - MyList *tmPat = 0; - tmPat = construct_patchlist(lev, Symmetry); - // tmPat construction completes - Parallel::distribute(tmPat, end_rank[lev] - start_rank[lev] + 1, ingfs, fngfs, false, start_rank[lev], end_rank[lev]); - // checkPatchList(tmPat,true); - bool CC = (lev > trfls); - Parallel::fill_level_data(tmPat, PatL[lev], PatL[lev - 1], OldList, StateList, FutureList, tmList, Symmetry, BB, CC); - - Parallel::KillBlocks(PatL[lev]); - PatL[lev]->destroyList(); - PatL[lev] = tmPat; -#if (RPB == 1) -#error "not support yet" -#endif - } -} - -//================================================================================================ - -void cgh::recompose_cgh_fake(int nprocs, bool *lev_flag, - MyList *OldList, MyList *StateList, - MyList *FutureList, MyList *tmList, - int Symmetry, bool BB) -{ - for (int lev = movls; lev < levels; lev++) - if (lev_flag[lev - movls] && lev != mylev) - { - MyList *tmPat = 0; - tmPat = construct_patchlist(lev, Symmetry); - // tmPat construction completes - Parallel::distribute(tmPat, end_rank[lev] - start_rank[lev] + 1, ingfs, fngfs, false, start_rank[lev], end_rank[lev]); - - Parallel::KillBlocks(PatL[lev]); - PatL[lev]->destroyList(); - PatL[lev] = tmPat; - } -} -#endif - -//================================================================================================ - -// This member function reads grid information from input files - -//================================================================================================ - -void cgh::read_bbox(int Symmetry, char *filename) -{ - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind1, sind2, sind3; - ifstream inf(filename, ifstream::in); - if (!inf.good() && myrank == 0) - { - cout << "cgh::cgh: Can not open parameter file " << filename << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind1); - if (status == -1) - { - cout << "error reading parameter file " << filename << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "cgh" && skey == "levels") - { - levels = atoi(sval.c_str()); - break; - } - } - inf.close(); - } - - grids = new int[levels]; - shape = new int **[levels]; - handle = new double **[levels]; - bbox = new double **[levels]; - PatL = new MyList *[levels]; - Lt = new double[levels]; -#if (RPB == 1) - bdsul = new MyList *[levels]; - rsul = new MyList *[levels]; -#endif - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind1, sind2, sind3; - ifstream inf(filename, ifstream::in); - if (!inf.good() && myrank == 0) - { - cout << "cgh::cgh: Can not open parameter file " << filename << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind1, sind2, sind3); - if (status == -1) - { - cout << "error reading parameter file " << filename << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "cgh" && skey == "grids" && sind1 < levels) - grids[sind1] = atoi(sval.c_str()); - } - inf.close(); - } - - for (int sind1 = 0; sind1 < levels; sind1++) - { - shape[sind1] = new int *[grids[sind1]]; - handle[sind1] = new double *[grids[sind1]]; - bbox[sind1] = new double *[grids[sind1]]; - for (int sind2 = 0; sind2 < grids[sind1]; sind2++) - { - shape[sind1][sind2] = new int[dim]; - handle[sind1][sind2] = new double[dim]; - bbox[sind1][sind2] = new double[2 * dim]; - } - } - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind1, sind2, sind3; - ifstream inf(filename, ifstream::in); - if (!inf.good() && myrank == 0) - { - cout << "cgh::cgh: Can not open parameter file " << filename << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind1, sind2, sind3); - - if (status == -1) - { - cout << "error reading parameter file " << filename << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "cgh" && sind1 < levels && sind2 < grids[sind1]) - { - if (skey == "bbox") - bbox[sind1][sind2][sind3] = atof(sval.c_str()); - else if (skey == "shape") - shape[sind1][sind2][sind3] = atoi(sval.c_str()); - } - } - inf.close(); - } -// we always assume the input parameter is in cell center style -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - for (int lev = 0; lev < levels; lev++) - for (int grd = 0; grd < grids[lev]; grd++) - { - for (int i = 0; i < dim; i++) - { - - shape[lev][grd][i] = shape[lev][grd][i] + 1; - } - } -#endif - - { - - // boxes align check - double DH0[dim]; - for (int i = 0; i < dim; i++) -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - DH0[i] = (bbox[0][0][i + dim] - bbox[0][0][i]) / (shape[0][0][i] - 1); -#else -#ifdef Cell - DH0[i] = (bbox[0][0][i + dim] - bbox[0][0][i]) / shape[0][0][i]; -#else -#error Not define Vertex nor Cell -#endif -#endif - for (int lev = 0; lev < levels; lev++) - for (int grd = 0; grd < grids[lev]; grd++) - Parallel::aligncheck(bbox[0][0], bbox[lev][grd], lev, DH0, shape[lev][grd]); - -#if 0 // we do not need it here, because we do it in construct_patchlist -// extend buffer points for shell overlap -#ifdef WithShell - for(int i=0;i *cgh::construct_patchlist(int lev, int Symmetry) -{ - // Construct Patches - MyList *tmPat = 0; - // construct box list - MyList *boxes = 0, *gs; - - /* - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - if (myrank == 0) - { - cout << " construct patchlist: " << " level = " << lev << ", grids in this level = " << grids[lev] << endl; - } - */ - - for (int grd = 0; grd < grids[lev]; grd++) - { - if (boxes) - { - gs->next = new MyList; - gs = gs->next; - gs->data = new Parallel::gridseg; - } - else - { - boxes = gs = new MyList; - gs->data = new Parallel::gridseg; - } - for (int i = 0; i < dim; i++) - { - gs->data->llb[i] = bbox[lev][grd][i]; - gs->data->uub[i] = bbox[lev][grd][dim + i]; - gs->data->shape[i] = shape[lev][grd][i]; - } - gs->data->Bg = 0; - gs->next = 0; - } - - // Merge grid boxes (merging more than three boxes may cause bugs) - // Parallel::merge_gsl(boxes, ratio); - if (grids[lev] < 3) - { - Parallel::merge_gsl(boxes, ratio); - } - - // When grid boxes overlap, re-split the boxes - // Parallel::cut_gsl(boxes); - if (grids[lev] < 3) - { - Parallel::cut_gsl(boxes); - } - - // After splitting, add new ghost regions? - // Parallel::add_ghost_touch(boxes); - if (grids[lev] < 3) - { - Parallel::add_ghost_touch(boxes); - } - - MyList *gp; - gs = boxes; - while (gs) - { - double tbb[2 * dim]; - if (tmPat) - { - gp->next = new MyList; - gp = gp->next; - for (int i = 0; i < dim; i++) - { - tbb[i] = gs->data->llb[i]; - tbb[dim + i] = gs->data->uub[i]; - } -#ifdef WithShell - gp->data = new Patch(3, gs->data->shape, tbb, lev, true, Symmetry); -#else - gp->data = new Patch(3, gs->data->shape, tbb, lev, (lev > 0), Symmetry); -#endif - } - else - { - tmPat = gp = new MyList; - for (int i = 0; i < dim; i++) - { - tbb[i] = gs->data->llb[i]; - tbb[dim + i] = gs->data->uub[i]; - } -#ifdef WithShell - gp->data = new Patch(3, gs->data->shape, tbb, lev, true, Symmetry); -#else - gp->data = new Patch(3, gs->data->shape, tbb, lev, (lev > 0), Symmetry); -#endif - } - gp->next = 0; - - gs = gs->next; - } - - boxes->destroyList(); - - return tmPat; -} - -//================================================================================================ - - -bool cgh::Interp_One_Point(MyList *VarList, - double *XX, /*input global Cartesian coordinate*/ - double *Shellf, int Symmetry) -{ - int lev = levels - 1; - while (lev >= 0) - { - MyList *Pp = PatL[lev]; - while (Pp) - { -#if (PSTR == 0) - if (Pp->data->Interp_ONE_Point(VarList, XX, Shellf, Symmetry)) - return true; -#elif (PSTR == 1 || PSTR == 2 || PSTR == 3) - if (Pp->data->Interp_ONE_Point(VarList, XX, Shellf, Symmetry, Commlev[lev])) - return true; -#endif - Pp = Pp->next; - } - lev--; - } - return false; -} - - -void cgh::Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0, - MyList *OldList, MyList *StateList, - MyList *FutureList, MyList *tmList, bool BB, - monitor *ErrorMonitor) -{ - if (lev < movls) - return; - -#if (0) - // #if (PSTR == 1 || PSTR == 2) - MyList *Pp = PatL[lev]; - while (Pp) - { - Pp->data->checkPatch(0, start_rank[mylev]); - Pp = Pp->next; - } - int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - if (myrank == start_rank[mylev]) - { - cout << "out_rank = " << myrank << endl; - for (int grd = 0; grd < grids[lev]; grd++) - { - cout << handle[lev][grd][0] << "," << handle[lev][grd][1] << "," << handle[lev][grd][2] << endl; - } - for (int bhi = 0; bhi < BH_num; bhi++) - { - cout << Porgls[lev][bhi][0] << "," << Porgls[lev][bhi][1] << "," << Porgls[lev][bhi][2] << endl; - cout << Porg0[bhi][0] << "," << Porg0[bhi][1] << "," << Porg0[bhi][2] << endl; - } - } -#endif - - // misc::tillherecheck(Commlev[lev],start_rank[lev],"start Regrid_Onelevel"); - // for moving part - bool tot_flag = false; - double **tmpPorg; - tmpPorg = new double *[BH_num]; - for (int bhi = 0; bhi < BH_num; bhi++) - { - tmpPorg[bhi] = new double[dim]; - for (int i = 0; i < dim; i++) - tmpPorg[bhi][i] = Porgls[lev][bhi][i]; - } - - for (int grd = 0; grd < grids[lev]; grd++) - { - int flag; - int do_every = 2; - double dX = PatL[lev]->data->blb->data->getdX(0); - double dY = PatL[lev]->data->blb->data->getdX(1); - double dZ = PatL[lev]->data->blb->data->getdX(2); - double rr; - // make sure that the grid corresponds to the black hole - int bhi = 0; - for (bhi = 0; bhi < BH_num; bhi++) - { - // because finner level may also change Porgbr, so we need factor 2 - // now I used Porgls - if (feq(Porgls[lev][bhi][0], handle[lev][grd][0], 2 * do_every * dX) && - feq(Porgls[lev][bhi][1], handle[lev][grd][1], 2 * do_every * dY) && - feq(Porgls[lev][bhi][2], handle[lev][grd][2], 2 * do_every * dZ)) - break; - } - if (bhi == BH_num) - { - // if the box has already touched the original point - if (feq(0, bbox[lev][grd][0], dX / 2) && - feq(0, bbox[lev][grd][1], dY / 2) && - feq(0, bbox[lev][grd][2], dZ / 2)) - break; - - if (BH_num == 1) - { - bhi = 0; - break; - } // if only one black hole, it definitely match! - - if (ErrorMonitor->outfile) - { - ErrorMonitor->outfile << "cgh::Regrid: no black hole matches with grid lev#" << lev << " grd#" << grd - << " with handle (" << handle[lev][grd][0] << "," << handle[lev][grd][1] << "," << handle[lev][grd][2] << ")" << endl; - ErrorMonitor->outfile << "black holes' old positions:" << endl; - for (bhi = 0; bhi < BH_num; bhi++) - ErrorMonitor->outfile << "#" << bhi << ": (" << Porgls[lev][bhi][0] << "," << Porgls[lev][bhi][1] << "," - << Porgls[lev][bhi][2] << ")" << endl; - ErrorMonitor->outfile << "tolerance:" << endl; - ErrorMonitor->outfile << "(" << 2 * do_every * dX << "," << 2 * do_every * dY << "," << 2 * do_every * dZ << ")" << endl; - ErrorMonitor->outfile << "box lower boundary: (" << bbox[lev][grd][0] << "," << bbox[lev][grd][1] << "," << bbox[lev][grd][2] << ")" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (bhi = 0; bhi < BH_num; bhi++) - delete[] tmpPorg[bhi]; - delete[] tmpPorg; - return; - } - // x direction - rr = (Porg0[bhi][0] - handle[lev][grd][0]) / dX; - if (rr > 0) - flag = int(rr + 0.5) / do_every; - else - flag = int(rr - 0.5) / do_every; - flag = flag * do_every; - rr = bbox[lev][grd][0] + flag * dX; - // pay attention to the symmetric case - if (Symmetry == 2 && rr < 0) - rr = -bbox[lev][grd][0]; - else - rr = flag * dX; - - if (fabs(rr) > dX / 2) - { - tot_flag = true; - bbox[lev][grd][0] = bbox[lev][grd][0] + rr; - bbox[lev][grd][3] = bbox[lev][grd][3] + rr; - handle[lev][grd][0] += rr; - tmpPorg[bhi][0] = Porg0[bhi][0]; - } - - // y direction - rr = (Porg0[bhi][1] - handle[lev][grd][1]) / dY; - if (rr > 0) - flag = int(rr + 0.5) / do_every; - else - flag = int(rr - 0.5) / do_every; - flag = flag * do_every; - rr = bbox[lev][grd][1] + flag * dY; - // pay attention to the symmetric case - if (Symmetry == 2 && rr < 0) - rr = -bbox[lev][grd][1]; - else - rr = flag * dY; - - if (fabs(rr) > dY / 2) - { - tot_flag = true; - bbox[lev][grd][1] = bbox[lev][grd][1] + rr; - bbox[lev][grd][4] = bbox[lev][grd][4] + rr; - handle[lev][grd][1] += rr; - tmpPorg[bhi][1] = Porg0[bhi][1]; - } - - // z direction - rr = (Porg0[bhi][2] - handle[lev][grd][2]) / dZ; - if (rr > 0) - flag = int(rr + 0.5) / do_every; - else - flag = int(rr - 0.5) / do_every; - flag = flag * do_every; - rr = bbox[lev][grd][2] + flag * dZ; - // pay attention to the symmetric case - if (Symmetry > 0 && rr < 0) - rr = -bbox[lev][grd][1]; - else - rr = flag * dZ; - - if (fabs(rr) > dZ / 2) - { - tot_flag = true; - bbox[lev][grd][2] = bbox[lev][grd][2] + rr; - bbox[lev][grd][5] = bbox[lev][grd][5] + rr; - handle[lev][grd][2] += rr; - tmpPorg[bhi][2] = Porg0[bhi][2]; - } - } - - // misc::tillherecheck(Commlev[lev],start_rank[lev],"after tot_flag check"); - - if (tot_flag) - { - int nprocs; - MPI_Comm_size(MPI_COMM_WORLD, &nprocs); - - // misc::tillherecheck(Commlev[lev],start_rank[lev],"before recompose_cgh_Onelevel"); - - recompose_cgh_Onelevel(nprocs, lev, OldList, StateList, FutureList, tmList, Symmetry, BB); - - // misc::tillherecheck(Commlev[lev],start_rank[lev],"after recompose_cgh_Onelevel"); - - for (int bhi = 0; bhi < BH_num; bhi++) - { - for (int i = 0; i < dim; i++) - Porgls[lev][bhi][i] = tmpPorg[bhi][i]; - } - -#if (PSTR == 1 || PSTR == 2) -// MyList *Pp=PatL[lev]; -// while(Pp) -// { -// Pp->data->checkPatch(0,start_rank[mylev]); -// Pp=Pp->next; -// } -#endif - } - - for (int bhi = 0; bhi < BH_num; bhi++) - delete[] tmpPorg[bhi]; - delete[] tmpPorg; -} - - -#if (PSTR == 0) -void cgh::recompose_cgh_Onelevel(int nprocs, int lev, - MyList *OldList, MyList *StateList, - MyList *FutureList, MyList *tmList, - int Symmetry, bool BB) -{ - MyList *tmPat = 0; - tmPat = construct_patchlist(lev, Symmetry); - // tmPat construction completes - Parallel::distribute(tmPat, nprocs, ingfs, fngfs, false); - // checkPatchList(tmPat,true); - bool CC = (lev > trfls); - Parallel::fill_level_data(tmPat, PatL[lev], PatL[lev - 1], OldList, StateList, FutureList, tmList, Symmetry, BB, CC); - - Parallel::KillBlocks(PatL[lev]); - PatL[lev]->destroyList(); - PatL[lev] = tmPat; -} -#elif (PSTR == 1 || PSTR == 2 || PSTR == 3) -#warning "recompose_cgh_Onelevel is not implimented yet" -void cgh::recompose_cgh_Onelevel(int nprocs, int lev, - MyList *OldList, MyList *StateList, - MyList *FutureList, MyList *tmList, - int Symmetry, bool BB) -{ - MyList *tmPat = 0; - misc::tillherecheck(Commlev[lev], start_rank[lev], "before construct_patchlist"); - tmPat = construct_patchlist(lev, Symmetry); - misc::tillherecheck(Commlev[lev], start_rank[lev], "after construct_patchlist"); - // tmPat construction completes - Parallel::distribute(tmPat, end_rank[lev] - start_rank[lev] + 1, ingfs, fngfs, false, start_rank[lev], end_rank[lev]); - misc::tillherecheck(Commlev[lev], start_rank[lev], "after distribute"); - // checkPatchList(tmPat,true); - bool CC = (lev > trfls); - Parallel::fill_level_data(tmPat, PatL[lev], PatL[lev - 1], OldList, StateList, FutureList, tmList, Symmetry, BB, CC); - misc::tillherecheck(Commlev[lev], start_rank[lev], "after fill_level_data"); - - Parallel::KillBlocks(PatL[lev]); - PatL[lev]->destroyList(); - PatL[lev] = tmPat; -} - - -// the input lev is lower level for regrid -void cgh::Regrid_Onelevel_aux(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0, - MyList *OldList, MyList *StateList, - MyList *FutureList, MyList *tmList, bool BB, - monitor *ErrorMonitor) -{ - lev++; - if (lev < movls) - return; - - // for moving part - bool tot_flag = false; - double **tmpPorg; - tmpPorg = new double *[BH_num]; - for (int bhi = 0; bhi < BH_num; bhi++) - { - tmpPorg[bhi] = new double[dim]; - for (int i = 0; i < dim; i++) - tmpPorg[bhi][i] = Porgbr[bhi][i]; - } - - for (int grd = 0; grd < grids[lev]; grd++) - { - int flag; - int do_every = 2; - double dX = PatL[lev]->data->blb->data->getdX(0); - double dY = PatL[lev]->data->blb->data->getdX(1); - double dZ = PatL[lev]->data->blb->data->getdX(2); - double rr; - // make sure that the grid corresponds to the black hole - int bhi = 0; - for (bhi = 0; bhi < BH_num; bhi++) - { - // because finner level may also change Porgbr, so we need factor 2 - if (feq(Porgbr[bhi][0], handle[lev][grd][0], 2 * do_every * dX) && - feq(Porgbr[bhi][1], handle[lev][grd][1], 2 * do_every * dY) && - feq(Porgbr[bhi][2], handle[lev][grd][2], 2 * do_every * dZ)) - break; - } - if (bhi == BH_num) - { - // if the box has already touched the original point - if (feq(0, bbox[lev][grd][0], dX / 2) && - feq(0, bbox[lev][grd][1], dY / 2) && - feq(0, bbox[lev][grd][2], dZ / 2)) - break; - - if (BH_num == 1) - { - bhi = 0; - break; - } // if only one black hole, it definitely match! - - if (ErrorMonitor->outfile) - { - ErrorMonitor->outfile << "cgh::Regrid: no black hole matches with grid lev#" << lev << " grd#" << grd - << " with handle (" << handle[lev][grd][0] << "," << handle[lev][grd][1] << "," << handle[lev][grd][2] << ")" << endl; - ErrorMonitor->outfile << "black holes' old positions:" << endl; - for (bhi = 0; bhi < BH_num; bhi++) - ErrorMonitor->outfile << "#" << bhi << ": (" << Porgbr[bhi][0] << "," << Porgbr[bhi][1] << "," << Porgbr[bhi][2] << ")" << endl; - ErrorMonitor->outfile << "tolerance:" << endl; - ErrorMonitor->outfile << "(" << 2 * do_every * dX << "," << 2 * do_every * dY << "," << 2 * do_every * dZ << ")" << endl; - ErrorMonitor->outfile << "box lower boundary: (" << bbox[lev][grd][0] << "," << bbox[lev][grd][1] << "," << bbox[lev][grd][2] << ")" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (bhi = 0; bhi < BH_num; bhi++) - delete[] tmpPorg[bhi]; - delete[] tmpPorg; - return; - } - // x direction - rr = (Porg0[bhi][0] - handle[lev][grd][0]) / dX; - if (rr > 0) - flag = int(rr + 0.5) / do_every; - else - flag = int(rr - 0.5) / do_every; - flag = flag * do_every; - rr = bbox[lev][grd][0] + flag * dX; - // pay attention to the symmetric case - if (Symmetry == 2 && rr < 0) - rr = -bbox[lev][grd][0]; - else - rr = flag * dX; - - if (fabs(rr) > dX / 2) - { - tot_flag = true; - bbox[lev][grd][0] = bbox[lev][grd][0] + rr; - bbox[lev][grd][3] = bbox[lev][grd][3] + rr; - handle[lev][grd][0] += rr; - tmpPorg[bhi][0] = Porg0[bhi][0]; - } - - // y direction - rr = (Porg0[bhi][1] - handle[lev][grd][1]) / dY; - if (rr > 0) - flag = int(rr + 0.5) / do_every; - else - flag = int(rr - 0.5) / do_every; - flag = flag * do_every; - rr = bbox[lev][grd][1] + flag * dY; - // pay attention to the symmetric case - if (Symmetry == 2 && rr < 0) - rr = -bbox[lev][grd][1]; - else - rr = flag * dY; - - if (fabs(rr) > dY / 2) - { - tot_flag = true; - bbox[lev][grd][1] = bbox[lev][grd][1] + rr; - bbox[lev][grd][4] = bbox[lev][grd][4] + rr; - handle[lev][grd][1] += rr; - tmpPorg[bhi][1] = Porg0[bhi][1]; - } - - // z direction - rr = (Porg0[bhi][2] - handle[lev][grd][2]) / dZ; - if (rr > 0) - flag = int(rr + 0.5) / do_every; - else - flag = int(rr - 0.5) / do_every; - flag = flag * do_every; - rr = bbox[lev][grd][2] + flag * dZ; - // pay attention to the symmetric case - if (Symmetry > 0 && rr < 0) - rr = -bbox[lev][grd][1]; - else - rr = flag * dZ; - - if (fabs(rr) > dZ / 2) - { - tot_flag = true; - bbox[lev][grd][2] = bbox[lev][grd][2] + rr; - bbox[lev][grd][5] = bbox[lev][grd][5] + rr; - handle[lev][grd][2] += rr; - tmpPorg[bhi][2] = Porg0[bhi][2]; - } - } - - if (tot_flag) - { - int nprocs; - MPI_Comm_size(MPI_COMM_WORLD, &nprocs); - recompose_cgh_Onelevel(nprocs, lev, OldList, StateList, FutureList, tmList, Symmetry, BB); - } - - for (int bhi = 0; bhi < BH_num; bhi++) - delete[] tmpPorg[bhi]; - delete[] tmpPorg; -} -#endif - - -void cgh::settrfls(const int lev) -{ - trfls = lev; -} + +#ifdef newc +#include +#include +#include +#include +#include +#include +#include +#include +using namespace std; +#else +#include +#include +#include +#include +#include +#include +#include +#include +#endif + +#include + +#include "macrodef.h" +#include "misc.h" +#include "cgh.h" +#include "Parallel.h" +#include "parameters.h" + +//================================================================================================ + +// define cgh class + +//================================================================================================ + +cgh::cgh(int ingfsi, int fngfsi, int Symmetry, char *filename, int checkrun, + monitor *ErrorMonitor) : ingfs(ingfsi), fngfs(fngfsi), trfls(0) +{ +#if (PSTR == 1 || PSTR == 2 || PSTR == 3) + Commlev = 0; + start_rank = 0; + end_rank = 0; +#endif + + // Initialize load balancing variables + enable_load_balance = false; + load_balance_check_interval = 10; // Check every 10 time steps + current_time_step = 0; + rank_interp_times = nullptr; + heavy_ranks = nullptr; + num_heavy_ranks = 0; + + if (!checkrun) + { + read_bbox(Symmetry, filename); + sethandle(ErrorMonitor); + for (int lev = 0; lev < levels; lev++) + PatL[lev] = construct_patchlist(lev, Symmetry); + } +} + +//================================================================================================ + + + +//================================================================================================ + +// This member function is the destructor; it releases allocated resources and deletes variables + +//================================================================================================ + +cgh::~cgh() +{ + for (int lev = 0; lev < levels; lev++) + { + for (int grd = 0; grd < grids[lev]; grd++) + { + delete[] bbox[lev][grd]; + delete[] shape[lev][grd]; + delete[] handle[lev][grd]; + } + delete[] bbox[lev]; + delete[] shape[lev]; + delete[] handle[lev]; + Parallel::KillBlocks(PatL[lev]); + PatL[lev]->destroyList(); +#if (RPB == 1) + Parallel::destroypsuList_bam(bdsul[lev]); + Parallel::destroypsuList_bam(rsul[lev]); +#endif + } + delete[] grids; + delete[] Lt; + delete[] bbox; + delete[] shape; + delete[] handle; + delete[] PatL; +#if (RPB == 1) + delete[] bdsul; + delete[] rsul; +#endif + +#if (PSTR == 1 || PSTR == 2 || PSTR == 3) + for (int lev = 0; lev < levels; lev++) + { + MPI_Comm_free(&Commlev[lev]); + } + + if (Commlev) + delete[] Commlev; + if (start_rank) + delete[] start_rank; + if (end_rank) + delete[] end_rank; +#endif + for (int lev = 0; lev < levels; lev++) + { + for (int ibh = 0; ibh < BH_num_in; ibh++) + delete[] Porgls[lev][ibh]; + delete[] Porgls[lev]; + } + delete[] Porgls; + + // Clean up load balancing memory + if (rank_interp_times) + delete[] rank_interp_times; + if (heavy_ranks) + delete[] heavy_ranks; +} + +//================================================================================================ + + +//================================================================================================ + +// This member function constructs the computational grid + +//================================================================================================ + +#if (PSTR == 0) +void cgh::compose_cgh(int nprocs) +{ + for (int lev = 0; lev < levels; lev++) + { + checkPatchList(PatL[lev], false); + Parallel::distribute_hard(PatL[lev], nprocs, ingfs, fngfs, false); +#if (RPB == 1) + // we need distributed box of PatL[lev] and PatL[lev-1] + if (lev > 0) + { + Parallel::Constr_pointstr_OutBdLow2Hi(PatL[lev], PatL[lev - 1], bdsul[lev]); + Parallel::Constr_pointstr_Restrict(PatL[lev], PatL[lev - 1], rsul[lev]); + } + else + { + bdsul[lev] = 0; + rsul[lev] = 0; + } +#endif + } +} + +//================================================================================================ + + +//================================================================================================ + +// This member function constructs the computational grid +// For the cases PSTR == 1 and PSTR == 2 + +//================================================================================================ + +#elif (PSTR == 1 || PSTR == 2) +void cgh::compose_cgh(int nprocs) +{ + Commlev = new MPI_Comm[levels]; + construct_mylev(nprocs); + for (int lev = 0; lev < levels; lev++) + { + MPI_Comm_split(MPI_COMM_WORLD, mylev, lev, &Commlev[lev]); + checkPatchList(PatL[lev], false); + Parallel::distribute(PatL[lev], end_rank[lev] - start_rank[lev] + 1, ingfs, fngfs, false, start_rank[lev], end_rank[lev]); +#if (RPB == 1) +#error "not support yet" +#endif + } + /* note different comm field has its own rank index + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD,&myrank); + if(myrank==nprocs-1) + { + cout<<"myrank = "<= start_rank[lev] && myrank <= end_rank[lev]) + mylev = lev; + } +} +#elif (PSTR == 2) +void cgh::construct_mylev(int nprocs) +{ + if (nprocs < levels) + { + cout << "Too few procs to use parallel level methods!" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + start_rank = new int[levels]; + end_rank = new int[levels]; + + int myrank; + + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + int mp; + mp = nprocs / levels; + + start_rank[levels - 1] = 0; + end_rank[levels - 1] = mp - 1; + for (int lev = levels - 2; lev > 0; lev--) + { + start_rank[lev] = end_rank[lev - 1] + 1; + end_rank[lev] = end_rank[lev - 1] + mp; + } + start_rank[0] = end_rank[1] + 1; + end_rank[0] = nprocs - 1; + + for (int lev = levels - 1; lev >= 0; lev--) + { + if (myrank >= start_rank[lev] && myrank <= end_rank[lev]) + mylev = lev; + } +} +#endif + +#elif (PSTR == 3) +void cgh::construct_mylev(int nprocs) +{ + if (nprocs <= 1) + { + cout << " cgh::construct_mylev requires at least 2 procs" << endl; + exit(0); + } + + start_rank = new int[2]; + end_rank = new int[2]; + + int myrank; + + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + int mp; + mp = nprocs / 2; + + // for other levels + for (int lev = 0; lev < levels - 1; lev++) + { + start_rank[lev] = 0; + end_rank[lev] = mp - 1; + } + // for finest level + start_rank[levels - 1] = end_rank[0] + 1; + end_rank[levels - 1] = nprocs - 1; + + if (myrank >= start_rank[0] && myrank <= end_rank[0]) + mylev = -1; // for other levels + else + mylev = 1; // for finest level +} + + +//----------------------------------------------------------------------- + + +void cgh::compose_cgh(int nprocs) +{ + Commlev = new MPI_Comm[levels]; + construct_mylev(nprocs); + + for (int lev = 0; lev < levels - 1; lev++) + { + MPI_Comm_split(MPI_COMM_WORLD, mylev, -1, &Commlev[lev]); + } + MPI_Comm_split(MPI_COMM_WORLD, mylev, 1, &Commlev[levels - 1]); + + for (int lev = 0; lev < levels; lev++) + { + checkPatchList(PatL[lev], false); + Parallel::distribute(PatL[lev], end_rank[lev] - start_rank[lev] + 1, ingfs, fngfs, false, start_rank[lev], end_rank[lev]); +#if (RPB == 1) +#error "not support yet" +#endif + } +} +#endif + + +void cgh::sethandle(monitor *ErrorMonitor) +{ + int BH_num; + Porgls = new double **[levels]; + char filename[100]; + { + map::iterator iter = parameters::str_par.find("inputpar"); + if (iter != parameters::str_par.end()) + { + strcpy(filename, (iter->second).c_str()); + } + else + { + cout << "Error inputpar" << endl; + exit(0); + } + } + // read parameter from file + { + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind; + ifstream inf(filename, ifstream::in); + if (!inf.good() && ErrorMonitor && ErrorMonitor->outfile) + { + ErrorMonitor->outfile << "Can not open parameter file " << filename << " for inputing information of black holes" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind); + if (status == -1) + { + if (ErrorMonitor && ErrorMonitor->outfile) + ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "BSSN" && skey == "BH_num") + BH_num = atoi(sval.c_str()); + else if (sgrp == "cgh" && skey == "moving levels start from") + { + movls = atoi(sval.c_str()); + movls = Mymin(movls, levels); + movls = Mymax(0, movls); + } + } + inf.close(); + } + for (int lev = 0; lev < levels; lev++) + { + Porgls[lev] = new double *[BH_num]; + for (int i = 0; i < BH_num; i++) + Porgls[lev][i] = new double[dim]; + } + // read parameter from file + { + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind; + ifstream inf(filename, ifstream::in); + if (!inf.good() && ErrorMonitor && ErrorMonitor->outfile) + { + ErrorMonitor->outfile << "Can not open parameter file " << filename + << " for inputing information of black holes" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind); + if (status == -1) + { + if (ErrorMonitor && ErrorMonitor->outfile) + ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "BSSN" && sind < BH_num) + { + if (skey == "Porgx") + { + for (int lev = 0; lev < levels; lev++) + Porgls[lev][sind][0] = atof(sval.c_str()); + } + else if (skey == "Porgy") + { + for (int lev = 0; lev < levels; lev++) + Porgls[lev][sind][1] = atof(sval.c_str()); + } + else if (skey == "Porgz") + { + for (int lev = 0; lev < levels; lev++) + Porgls[lev][sind][2] = atof(sval.c_str()); + } + } + } + inf.close(); + } + + for (int lev = 0; lev < movls; lev++) + for (int grd = 0; grd < grids[lev]; grd++) + for (int i = 0; i < dim; i++) + handle[lev][grd][i] = 0; + + if (movls < levels) + { + if (ErrorMonitor && ErrorMonitor->I_Print) + { + cout << endl; + cout << " moving levels are lev #" << movls << "--" << levels - 1 << endl; + cout << endl; + } + + for (int lev = movls; lev < levels; lev++) + for (int grd = 0; grd < grids[lev]; grd++) + { +#if 0 + int bht=0; + for(int bhi=0;bhi bbox[lev][grd][i+dim]) {flag=true; break;} + if(flag) continue; + bht++; + if(bht==1) for(int i=0;ioutfile) + { + ErrorMonitor->outfile<<"cgh::sethandle: lev#"< dis1) + { + bht = bhi; + dis0 = dis1; + } // chose nearest one + } + } + for (int i = 0; i < dim; i++) + handle[lev][grd][i] = Porgls[0][bht][i]; +#endif + } + } + else if (ErrorMonitor && ErrorMonitor->I_Print) + { + if (levels > 1) + cout << "fixed mesh refinement!" << endl; + else + cout << "unigrid simulation!" << endl; + } + + BH_num_in = BH_num; +} +void cgh::checkPatchList(MyList *PatL, bool buflog) +{ + while (PatL) + { + PatL->data->checkPatch(buflog); + PatL = PatL->next; + } +} + + +//================================================================================================ + +// This member function moves the grid + +//================================================================================================ + +void cgh::Regrid(int Symmetry, int BH_num, double **Porgbr, double **Porg0, + MyList *OldList, MyList *StateList, + MyList *FutureList, MyList *tmList, bool BB, + monitor *ErrorMonitor) +{ + // for moving part + if (movls < levels) + { + bool tot_flag = false; + bool *lev_flag; + double **tmpPorg; + tmpPorg = new double *[BH_num]; + for (int bhi = 0; bhi < BH_num; bhi++) + { + tmpPorg[bhi] = new double[dim]; + for (int i = 0; i < dim; i++) + tmpPorg[bhi][i] = Porgbr[bhi][i]; + } + lev_flag = new bool[levels - movls]; + for (int lev = movls; lev < levels; lev++) + { + lev_flag[lev - movls] = false; + for (int grd = 0; grd < grids[lev]; grd++) + { + int flag; + int do_every = 2; + double dX = PatL[lev]->data->blb->data->getdX(0); + double dY = PatL[lev]->data->blb->data->getdX(1); + double dZ = PatL[lev]->data->blb->data->getdX(2); + double rr; + // make sure that the grid corresponds to the black hole + int bhi = 0; + for (bhi = 0; bhi < BH_num; bhi++) + { + // because finner level may also change Porgbr, so we need factor 2 + if (feq(Porgbr[bhi][0], handle[lev][grd][0], 2 * do_every * dX) && + feq(Porgbr[bhi][1], handle[lev][grd][1], 2 * do_every * dY) && + feq(Porgbr[bhi][2], handle[lev][grd][2], 2 * do_every * dZ)) + break; + } + if (bhi == BH_num) + { + // if the box has already touched the original point + if (feq(0, bbox[lev][grd][0], dX / 2) && + feq(0, bbox[lev][grd][1], dY / 2) && + feq(0, bbox[lev][grd][2], dZ / 2)) + break; + + if (BH_num == 1) + { + bhi = 0; + break; + } // if only one black hole, it definitely match! + + if (ErrorMonitor->outfile) + { + ErrorMonitor->outfile << "cgh::Regrid: no black hole matches with grid lev#" << lev << " grd#" << grd + << " with handle (" << handle[lev][grd][0] << "," << handle[lev][grd][1] << "," << handle[lev][grd][2] << ")" << endl; + ErrorMonitor->outfile << "black holes' old positions:" << endl; + for (bhi = 0; bhi < BH_num; bhi++) + ErrorMonitor->outfile << "#" << bhi << ": (" << Porgbr[bhi][0] << "," << Porgbr[bhi][1] << "," << Porgbr[bhi][2] << ")" << endl; + ErrorMonitor->outfile << "tolerance:" << endl; + ErrorMonitor->outfile << "(" << 2 * do_every * dX << "," << 2 * do_every * dY << "," << 2 * do_every * dZ << ")" << endl; + ErrorMonitor->outfile << "box lower boundary: (" << bbox[lev][grd][0] << "," << bbox[lev][grd][1] << "," << bbox[lev][grd][2] << ")" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + delete[] lev_flag; + for (bhi = 0; bhi < BH_num; bhi++) + delete[] tmpPorg[bhi]; + delete[] tmpPorg; + return; + } + // x direction + rr = (Porg0[bhi][0] - handle[lev][grd][0]) / dX; + if (rr > 0) + flag = int(rr + 0.5) / do_every; + else + flag = int(rr - 0.5) / do_every; + flag = flag * do_every; + rr = bbox[lev][grd][0] + flag * dX; + // pay attention to the symmetric case + if (Symmetry == 2 && rr < 0) + rr = -bbox[lev][grd][0]; + else + rr = flag * dX; + + if (fabs(rr) > dX / 2) + { + lev_flag[lev - movls] = tot_flag = true; + bbox[lev][grd][0] = bbox[lev][grd][0] + rr; + bbox[lev][grd][3] = bbox[lev][grd][3] + rr; + handle[lev][grd][0] += rr; + tmpPorg[bhi][0] = Porg0[bhi][0]; + } + + // y direction + rr = (Porg0[bhi][1] - handle[lev][grd][1]) / dY; + if (rr > 0) + flag = int(rr + 0.5) / do_every; + else + flag = int(rr - 0.5) / do_every; + flag = flag * do_every; + rr = bbox[lev][grd][1] + flag * dY; + // pay attention to the symmetric case + if (Symmetry == 2 && rr < 0) + rr = -bbox[lev][grd][1]; + else + rr = flag * dY; + + if (fabs(rr) > dY / 2) + { + lev_flag[lev - movls] = tot_flag = true; + bbox[lev][grd][1] = bbox[lev][grd][1] + rr; + bbox[lev][grd][4] = bbox[lev][grd][4] + rr; + handle[lev][grd][1] += rr; + tmpPorg[bhi][1] = Porg0[bhi][1]; + } + + // z direction + rr = (Porg0[bhi][2] - handle[lev][grd][2]) / dZ; + if (rr > 0) + flag = int(rr + 0.5) / do_every; + else + flag = int(rr - 0.5) / do_every; + flag = flag * do_every; + rr = bbox[lev][grd][2] + flag * dZ; + // pay attention to the symmetric case + if (Symmetry > 0 && rr < 0) + rr = -bbox[lev][grd][1]; + else + rr = flag * dZ; + + if (fabs(rr) > dZ / 2) + { + lev_flag[lev - movls] = tot_flag = true; + bbox[lev][grd][2] = bbox[lev][grd][2] + rr; + bbox[lev][grd][5] = bbox[lev][grd][5] + rr; + handle[lev][grd][2] += rr; + tmpPorg[bhi][2] = Porg0[bhi][2]; + } + } + // if(ErrorMonitor->outfile && lev_flag[lev-movls]) cout<<"lev#"< *OldList, MyList *StateList, + MyList *FutureList, MyList *tmList, bool BB, + monitor *ErrorMonitor) +{ + // for moving part + if (movls < levels) + { + bool tot_flag = false; + bool *lev_flag; + double **tmpPorg; + tmpPorg = new double *[BH_num]; + for (int bhi = 0; bhi < BH_num; bhi++) + { + tmpPorg[bhi] = new double[dim]; + for (int i = 0; i < dim; i++) + tmpPorg[bhi][i] = Porgbr[bhi][i]; + } + lev_flag = new bool[levels - movls]; + for (int lev = movls; lev < levels; lev++) + { + lev_flag[lev - movls] = false; + for (int grd = 0; grd < grids[lev]; grd++) + { + int flag; + int do_every = 2; + double dX = PatL[lev]->data->blb->data->getdX(0); + double dY = PatL[lev]->data->blb->data->getdX(1); + double dZ = PatL[lev]->data->blb->data->getdX(2); + double rr; + // make sure that the grid corresponds to the black hole + int bhi = 0; + for (bhi = 0; bhi < BH_num; bhi++) + { + // because finner level may also change Porgbr, so we need factor 2 + if (feq(Porgbr[bhi][0], handle[lev][grd][0], 2 * do_every * dX) && + feq(Porgbr[bhi][1], handle[lev][grd][1], 2 * do_every * dY) && + feq(Porgbr[bhi][2], handle[lev][grd][2], 2 * do_every * dZ)) + break; + } + if (bhi == BH_num) + { + // if the box has already touched the original point + if (feq(0, bbox[lev][grd][0], dX / 2) && + feq(0, bbox[lev][grd][1], dY / 2) && + feq(0, bbox[lev][grd][2], dZ / 2)) + break; + + if (BH_num == 1) + { + bhi = 0; + break; + } // if only one black hole, it definitely match! + + if (ErrorMonitor->outfile) + { + ErrorMonitor->outfile << "cgh::Regrid: no black hole matches with grid lev#" << lev << " grd#" << grd + << " with handle (" << handle[lev][grd][0] << "," << handle[lev][grd][1] << "," << handle[lev][grd][2] << ")" << endl; + ErrorMonitor->outfile << "black holes' old positions:" << endl; + for (bhi = 0; bhi < BH_num; bhi++) + ErrorMonitor->outfile << "#" << bhi << ": (" << Porgbr[bhi][0] << "," << Porgbr[bhi][1] << "," << Porgbr[bhi][2] << ")" << endl; + ErrorMonitor->outfile << "tolerance:" << endl; + ErrorMonitor->outfile << "(" << 2 * do_every * dX << "," << 2 * do_every * dY << "," << 2 * do_every * dZ << ")" << endl; + ErrorMonitor->outfile << "box lower boundary: (" << bbox[lev][grd][0] << "," << bbox[lev][grd][1] << "," << bbox[lev][grd][2] << ")" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + delete[] lev_flag; + for (bhi = 0; bhi < BH_num; bhi++) + delete[] tmpPorg[bhi]; + delete[] tmpPorg; + return; + } + // x direction + rr = (Porg0[bhi][0] - handle[lev][grd][0]) / dX; + if (rr > 0) + flag = int(rr + 0.5) / do_every; + else + flag = int(rr - 0.5) / do_every; + flag = flag * do_every; + rr = bbox[lev][grd][0] + flag * dX; + // pay attention to the symmetric case + if (Symmetry == 2 && rr < 0) + rr = -bbox[lev][grd][0]; + else + rr = flag * dX; + + if (fabs(rr) > dX / 2) + { + lev_flag[lev - movls] = tot_flag = true; + bbox[lev][grd][0] = bbox[lev][grd][0] + rr; + bbox[lev][grd][3] = bbox[lev][grd][3] + rr; + handle[lev][grd][0] += rr; + tmpPorg[bhi][0] = Porg0[bhi][0]; + } + + // y direction + rr = (Porg0[bhi][1] - handle[lev][grd][1]) / dY; + if (rr > 0) + flag = int(rr + 0.5) / do_every; + else + flag = int(rr - 0.5) / do_every; + flag = flag * do_every; + rr = bbox[lev][grd][1] + flag * dY; + // pay attention to the symmetric case + if (Symmetry == 2 && rr < 0) + rr = -bbox[lev][grd][1]; + else + rr = flag * dY; + + if (fabs(rr) > dY / 2) + { + lev_flag[lev - movls] = tot_flag = true; + bbox[lev][grd][1] = bbox[lev][grd][1] + rr; + bbox[lev][grd][4] = bbox[lev][grd][4] + rr; + handle[lev][grd][1] += rr; + tmpPorg[bhi][1] = Porg0[bhi][1]; + } + + // z direction + rr = (Porg0[bhi][2] - handle[lev][grd][2]) / dZ; + if (rr > 0) + flag = int(rr + 0.5) / do_every; + else + flag = int(rr - 0.5) / do_every; + flag = flag * do_every; + rr = bbox[lev][grd][2] + flag * dZ; + // pay attention to the symmetric case + if (Symmetry > 0 && rr < 0) + rr = -bbox[lev][grd][1]; + else + rr = flag * dZ; + + if (fabs(rr) > dZ / 2) + { + lev_flag[lev - movls] = tot_flag = true; + bbox[lev][grd][2] = bbox[lev][grd][2] + rr; + bbox[lev][grd][5] = bbox[lev][grd][5] + rr; + handle[lev][grd][2] += rr; + tmpPorg[bhi][2] = Porg0[bhi][2]; + } + } + // if(ErrorMonitor->outfile && lev_flag[lev-movls]) cout<<"lev#"< *OldList, MyList *StateList, + MyList *FutureList, MyList *tmList, + int Symmetry, bool BB) +{ + for (int lev = movls; lev < levels; lev++) + if (lev_flag[lev - movls]) + { + MyList *tmPat = 0; + tmPat = construct_patchlist(lev, Symmetry); + // tmPat construction completes + Parallel::distribute(tmPat, nprocs, ingfs, fngfs, false); + // checkPatchList(tmPat,true); + bool CC = (lev > trfls); + Parallel::fill_level_data(tmPat, PatL[lev], PatL[lev - 1], OldList, StateList, FutureList, tmList, Symmetry, BB, CC); + + Parallel::KillBlocks(PatL[lev]); + PatL[lev]->destroyList(); + PatL[lev] = tmPat; +#if (RPB == 1) + Parallel::destroypsuList_bam(bdsul[lev]); + Parallel::destroypsuList_bam(rsul[lev]); + Parallel::Constr_pointstr_OutBdLow2Hi(PatL[lev], PatL[lev - 1], bdsul[lev]); + Parallel::Constr_pointstr_Restrict(PatL[lev], PatL[lev - 1], rsul[lev]); +#endif + } +} +#elif (PSTR == 1 || PSTR == 2 || PSTR == 3) +#warning "recompose_cgh is not implimented yet" +void cgh::recompose_cgh(int nprocs, bool *lev_flag, + MyList *OldList, MyList *StateList, + MyList *FutureList, MyList *tmList, + int Symmetry, bool BB) +{ + for (int lev = movls; lev < levels; lev++) + if (lev_flag[lev - movls]) + { + MyList *tmPat = 0; + tmPat = construct_patchlist(lev, Symmetry); + // tmPat construction completes + Parallel::distribute(tmPat, end_rank[lev] - start_rank[lev] + 1, ingfs, fngfs, false, start_rank[lev], end_rank[lev]); + // checkPatchList(tmPat,true); + bool CC = (lev > trfls); + Parallel::fill_level_data(tmPat, PatL[lev], PatL[lev - 1], OldList, StateList, FutureList, tmList, Symmetry, BB, CC); + + Parallel::KillBlocks(PatL[lev]); + PatL[lev]->destroyList(); + PatL[lev] = tmPat; +#if (RPB == 1) +#error "not support yet" +#endif + } +} + +//================================================================================================ + +void cgh::recompose_cgh_fake(int nprocs, bool *lev_flag, + MyList *OldList, MyList *StateList, + MyList *FutureList, MyList *tmList, + int Symmetry, bool BB) +{ + for (int lev = movls; lev < levels; lev++) + if (lev_flag[lev - movls] && lev != mylev) + { + MyList *tmPat = 0; + tmPat = construct_patchlist(lev, Symmetry); + // tmPat construction completes + Parallel::distribute(tmPat, end_rank[lev] - start_rank[lev] + 1, ingfs, fngfs, false, start_rank[lev], end_rank[lev]); + + Parallel::KillBlocks(PatL[lev]); + PatL[lev]->destroyList(); + PatL[lev] = tmPat; + } +} +#endif + +//================================================================================================ + +// This member function reads grid information from input files + +//================================================================================================ + +void cgh::read_bbox(int Symmetry, char *filename) +{ + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + // read parameter from file + { + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind1, sind2, sind3; + ifstream inf(filename, ifstream::in); + if (!inf.good() && myrank == 0) + { + cout << "cgh::cgh: Can not open parameter file " << filename << " for inputing information of black holes" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind1); + if (status == -1) + { + cout << "error reading parameter file " << filename << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "cgh" && skey == "levels") + { + levels = atoi(sval.c_str()); + break; + } + } + inf.close(); + } + + grids = new int[levels]; + shape = new int **[levels]; + handle = new double **[levels]; + bbox = new double **[levels]; + PatL = new MyList *[levels]; + Lt = new double[levels]; +#if (RPB == 1) + bdsul = new MyList *[levels]; + rsul = new MyList *[levels]; +#endif + // read parameter from file + { + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind1, sind2, sind3; + ifstream inf(filename, ifstream::in); + if (!inf.good() && myrank == 0) + { + cout << "cgh::cgh: Can not open parameter file " << filename << " for inputing information of black holes" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind1, sind2, sind3); + if (status == -1) + { + cout << "error reading parameter file " << filename << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "cgh" && skey == "grids" && sind1 < levels) + grids[sind1] = atoi(sval.c_str()); + } + inf.close(); + } + + for (int sind1 = 0; sind1 < levels; sind1++) + { + shape[sind1] = new int *[grids[sind1]]; + handle[sind1] = new double *[grids[sind1]]; + bbox[sind1] = new double *[grids[sind1]]; + for (int sind2 = 0; sind2 < grids[sind1]; sind2++) + { + shape[sind1][sind2] = new int[dim]; + handle[sind1][sind2] = new double[dim]; + bbox[sind1][sind2] = new double[2 * dim]; + } + } + // read parameter from file + { + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind1, sind2, sind3; + ifstream inf(filename, ifstream::in); + if (!inf.good() && myrank == 0) + { + cout << "cgh::cgh: Can not open parameter file " << filename << " for inputing information of black holes" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind1, sind2, sind3); + + if (status == -1) + { + cout << "error reading parameter file " << filename << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "cgh" && sind1 < levels && sind2 < grids[sind1]) + { + if (skey == "bbox") + bbox[sind1][sind2][sind3] = atof(sval.c_str()); + else if (skey == "shape") + shape[sind1][sind2][sind3] = atoi(sval.c_str()); + } + } + inf.close(); + } +// we always assume the input parameter is in cell center style +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + for (int lev = 0; lev < levels; lev++) + for (int grd = 0; grd < grids[lev]; grd++) + { + for (int i = 0; i < dim; i++) + { + + shape[lev][grd][i] = shape[lev][grd][i] + 1; + } + } +#endif + + { + + // boxes align check + double DH0[dim]; + for (int i = 0; i < dim; i++) +#ifdef Vertex +#ifdef Cell +#error Both Cell and Vertex are defined +#endif + DH0[i] = (bbox[0][0][i + dim] - bbox[0][0][i]) / (shape[0][0][i] - 1); +#else +#ifdef Cell + DH0[i] = (bbox[0][0][i + dim] - bbox[0][0][i]) / shape[0][0][i]; +#else +#error Not define Vertex nor Cell +#endif +#endif + for (int lev = 0; lev < levels; lev++) + for (int grd = 0; grd < grids[lev]; grd++) + Parallel::aligncheck(bbox[0][0], bbox[lev][grd], lev, DH0, shape[lev][grd]); + +#if 0 // we do not need it here, because we do it in construct_patchlist +// extend buffer points for shell overlap +#ifdef WithShell + for(int i=0;i *cgh::construct_patchlist(int lev, int Symmetry) +{ + // Construct Patches + MyList *tmPat = 0; + // construct box list + MyList *boxes = 0, *gs; + + /* + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + if (myrank == 0) + { + cout << " construct patchlist: " << " level = " << lev << ", grids in this level = " << grids[lev] << endl; + } + */ + + for (int grd = 0; grd < grids[lev]; grd++) + { + if (boxes) + { + gs->next = new MyList; + gs = gs->next; + gs->data = new Parallel::gridseg; + } + else + { + boxes = gs = new MyList; + gs->data = new Parallel::gridseg; + } + for (int i = 0; i < dim; i++) + { + gs->data->llb[i] = bbox[lev][grd][i]; + gs->data->uub[i] = bbox[lev][grd][dim + i]; + gs->data->shape[i] = shape[lev][grd][i]; + } + gs->data->Bg = 0; + gs->next = 0; + } + + // Merge grid boxes (merging more than three boxes may cause bugs) + // Parallel::merge_gsl(boxes, ratio); + if (grids[lev] < 3) + { + Parallel::merge_gsl(boxes, ratio); + } + + // When grid boxes overlap, re-split the boxes + // Parallel::cut_gsl(boxes); + if (grids[lev] < 3) + { + Parallel::cut_gsl(boxes); + } + + // After splitting, add new ghost regions? + // Parallel::add_ghost_touch(boxes); + if (grids[lev] < 3) + { + Parallel::add_ghost_touch(boxes); + } + + MyList *gp; + gs = boxes; + while (gs) + { + double tbb[2 * dim]; + if (tmPat) + { + gp->next = new MyList; + gp = gp->next; + for (int i = 0; i < dim; i++) + { + tbb[i] = gs->data->llb[i]; + tbb[dim + i] = gs->data->uub[i]; + } +#ifdef WithShell + gp->data = new Patch(3, gs->data->shape, tbb, lev, true, Symmetry); +#else + gp->data = new Patch(3, gs->data->shape, tbb, lev, (lev > 0), Symmetry); +#endif + } + else + { + tmPat = gp = new MyList; + for (int i = 0; i < dim; i++) + { + tbb[i] = gs->data->llb[i]; + tbb[dim + i] = gs->data->uub[i]; + } +#ifdef WithShell + gp->data = new Patch(3, gs->data->shape, tbb, lev, true, Symmetry); +#else + gp->data = new Patch(3, gs->data->shape, tbb, lev, (lev > 0), Symmetry); +#endif + } + gp->next = 0; + + gs = gs->next; + } + + boxes->destroyList(); + + return tmPat; +} + +//================================================================================================ + + +bool cgh::Interp_One_Point(MyList *VarList, + double *XX, /*input global Cartesian coordinate*/ + double *Shellf, int Symmetry) +{ + int lev = levels - 1; + while (lev >= 0) + { + MyList *Pp = PatL[lev]; + while (Pp) + { +#if (PSTR == 0) + if (Pp->data->Interp_ONE_Point(VarList, XX, Shellf, Symmetry)) + return true; +#elif (PSTR == 1 || PSTR == 2 || PSTR == 3) + if (Pp->data->Interp_ONE_Point(VarList, XX, Shellf, Symmetry, Commlev[lev])) + return true; +#endif + Pp = Pp->next; + } + lev--; + } + return false; +} + + +void cgh::Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0, + MyList *OldList, MyList *StateList, + MyList *FutureList, MyList *tmList, bool BB, + monitor *ErrorMonitor) +{ + if (lev < movls) + return; + +#if (0) + // #if (PSTR == 1 || PSTR == 2) + MyList *Pp = PatL[lev]; + while (Pp) + { + Pp->data->checkPatch(0, start_rank[mylev]); + Pp = Pp->next; + } + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + if (myrank == start_rank[mylev]) + { + cout << "out_rank = " << myrank << endl; + for (int grd = 0; grd < grids[lev]; grd++) + { + cout << handle[lev][grd][0] << "," << handle[lev][grd][1] << "," << handle[lev][grd][2] << endl; + } + for (int bhi = 0; bhi < BH_num; bhi++) + { + cout << Porgls[lev][bhi][0] << "," << Porgls[lev][bhi][1] << "," << Porgls[lev][bhi][2] << endl; + cout << Porg0[bhi][0] << "," << Porg0[bhi][1] << "," << Porg0[bhi][2] << endl; + } + } +#endif + + // misc::tillherecheck(Commlev[lev],start_rank[lev],"start Regrid_Onelevel"); + // for moving part + bool tot_flag = false; + double **tmpPorg; + tmpPorg = new double *[BH_num]; + for (int bhi = 0; bhi < BH_num; bhi++) + { + tmpPorg[bhi] = new double[dim]; + for (int i = 0; i < dim; i++) + tmpPorg[bhi][i] = Porgls[lev][bhi][i]; + } + + for (int grd = 0; grd < grids[lev]; grd++) + { + int flag; + int do_every = 2; + double dX = PatL[lev]->data->blb->data->getdX(0); + double dY = PatL[lev]->data->blb->data->getdX(1); + double dZ = PatL[lev]->data->blb->data->getdX(2); + double rr; + // make sure that the grid corresponds to the black hole + int bhi = 0; + for (bhi = 0; bhi < BH_num; bhi++) + { + // because finner level may also change Porgbr, so we need factor 2 + // now I used Porgls + if (feq(Porgls[lev][bhi][0], handle[lev][grd][0], 2 * do_every * dX) && + feq(Porgls[lev][bhi][1], handle[lev][grd][1], 2 * do_every * dY) && + feq(Porgls[lev][bhi][2], handle[lev][grd][2], 2 * do_every * dZ)) + break; + } + if (bhi == BH_num) + { + // if the box has already touched the original point + if (feq(0, bbox[lev][grd][0], dX / 2) && + feq(0, bbox[lev][grd][1], dY / 2) && + feq(0, bbox[lev][grd][2], dZ / 2)) + break; + + if (BH_num == 1) + { + bhi = 0; + break; + } // if only one black hole, it definitely match! + + if (ErrorMonitor->outfile) + { + ErrorMonitor->outfile << "cgh::Regrid: no black hole matches with grid lev#" << lev << " grd#" << grd + << " with handle (" << handle[lev][grd][0] << "," << handle[lev][grd][1] << "," << handle[lev][grd][2] << ")" << endl; + ErrorMonitor->outfile << "black holes' old positions:" << endl; + for (bhi = 0; bhi < BH_num; bhi++) + ErrorMonitor->outfile << "#" << bhi << ": (" << Porgls[lev][bhi][0] << "," << Porgls[lev][bhi][1] << "," + << Porgls[lev][bhi][2] << ")" << endl; + ErrorMonitor->outfile << "tolerance:" << endl; + ErrorMonitor->outfile << "(" << 2 * do_every * dX << "," << 2 * do_every * dY << "," << 2 * do_every * dZ << ")" << endl; + ErrorMonitor->outfile << "box lower boundary: (" << bbox[lev][grd][0] << "," << bbox[lev][grd][1] << "," << bbox[lev][grd][2] << ")" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (bhi = 0; bhi < BH_num; bhi++) + delete[] tmpPorg[bhi]; + delete[] tmpPorg; + return; + } + // x direction + rr = (Porg0[bhi][0] - handle[lev][grd][0]) / dX; + if (rr > 0) + flag = int(rr + 0.5) / do_every; + else + flag = int(rr - 0.5) / do_every; + flag = flag * do_every; + rr = bbox[lev][grd][0] + flag * dX; + // pay attention to the symmetric case + if (Symmetry == 2 && rr < 0) + rr = -bbox[lev][grd][0]; + else + rr = flag * dX; + + if (fabs(rr) > dX / 2) + { + tot_flag = true; + bbox[lev][grd][0] = bbox[lev][grd][0] + rr; + bbox[lev][grd][3] = bbox[lev][grd][3] + rr; + handle[lev][grd][0] += rr; + tmpPorg[bhi][0] = Porg0[bhi][0]; + } + + // y direction + rr = (Porg0[bhi][1] - handle[lev][grd][1]) / dY; + if (rr > 0) + flag = int(rr + 0.5) / do_every; + else + flag = int(rr - 0.5) / do_every; + flag = flag * do_every; + rr = bbox[lev][grd][1] + flag * dY; + // pay attention to the symmetric case + if (Symmetry == 2 && rr < 0) + rr = -bbox[lev][grd][1]; + else + rr = flag * dY; + + if (fabs(rr) > dY / 2) + { + tot_flag = true; + bbox[lev][grd][1] = bbox[lev][grd][1] + rr; + bbox[lev][grd][4] = bbox[lev][grd][4] + rr; + handle[lev][grd][1] += rr; + tmpPorg[bhi][1] = Porg0[bhi][1]; + } + + // z direction + rr = (Porg0[bhi][2] - handle[lev][grd][2]) / dZ; + if (rr > 0) + flag = int(rr + 0.5) / do_every; + else + flag = int(rr - 0.5) / do_every; + flag = flag * do_every; + rr = bbox[lev][grd][2] + flag * dZ; + // pay attention to the symmetric case + if (Symmetry > 0 && rr < 0) + rr = -bbox[lev][grd][1]; + else + rr = flag * dZ; + + if (fabs(rr) > dZ / 2) + { + tot_flag = true; + bbox[lev][grd][2] = bbox[lev][grd][2] + rr; + bbox[lev][grd][5] = bbox[lev][grd][5] + rr; + handle[lev][grd][2] += rr; + tmpPorg[bhi][2] = Porg0[bhi][2]; + } + } + + // misc::tillherecheck(Commlev[lev],start_rank[lev],"after tot_flag check"); + + if (tot_flag) + { + int nprocs; + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + // misc::tillherecheck(Commlev[lev],start_rank[lev],"before recompose_cgh_Onelevel"); + + recompose_cgh_Onelevel(nprocs, lev, OldList, StateList, FutureList, tmList, Symmetry, BB); + + // misc::tillherecheck(Commlev[lev],start_rank[lev],"after recompose_cgh_Onelevel"); + + for (int bhi = 0; bhi < BH_num; bhi++) + { + for (int i = 0; i < dim; i++) + Porgls[lev][bhi][i] = tmpPorg[bhi][i]; + } + +#if (PSTR == 1 || PSTR == 2) +// MyList *Pp=PatL[lev]; +// while(Pp) +// { +// Pp->data->checkPatch(0,start_rank[mylev]); +// Pp=Pp->next; +// } +#endif + } + + for (int bhi = 0; bhi < BH_num; bhi++) + delete[] tmpPorg[bhi]; + delete[] tmpPorg; +} + + +#if (PSTR == 0) +void cgh::recompose_cgh_Onelevel(int nprocs, int lev, + MyList *OldList, MyList *StateList, + MyList *FutureList, MyList *tmList, + int Symmetry, bool BB) +{ + MyList *tmPat = 0; + tmPat = construct_patchlist(lev, Symmetry); + // tmPat construction completes + Parallel::distribute(tmPat, nprocs, ingfs, fngfs, false); + // checkPatchList(tmPat,true); + bool CC = (lev > trfls); + Parallel::fill_level_data(tmPat, PatL[lev], PatL[lev - 1], OldList, StateList, FutureList, tmList, Symmetry, BB, CC); + + Parallel::KillBlocks(PatL[lev]); + PatL[lev]->destroyList(); + PatL[lev] = tmPat; +} +#elif (PSTR == 1 || PSTR == 2 || PSTR == 3) +#warning "recompose_cgh_Onelevel is not implimented yet" +void cgh::recompose_cgh_Onelevel(int nprocs, int lev, + MyList *OldList, MyList *StateList, + MyList *FutureList, MyList *tmList, + int Symmetry, bool BB) +{ + MyList *tmPat = 0; + misc::tillherecheck(Commlev[lev], start_rank[lev], "before construct_patchlist"); + tmPat = construct_patchlist(lev, Symmetry); + misc::tillherecheck(Commlev[lev], start_rank[lev], "after construct_patchlist"); + // tmPat construction completes + Parallel::distribute(tmPat, end_rank[lev] - start_rank[lev] + 1, ingfs, fngfs, false, start_rank[lev], end_rank[lev]); + misc::tillherecheck(Commlev[lev], start_rank[lev], "after distribute"); + // checkPatchList(tmPat,true); + bool CC = (lev > trfls); + Parallel::fill_level_data(tmPat, PatL[lev], PatL[lev - 1], OldList, StateList, FutureList, tmList, Symmetry, BB, CC); + misc::tillherecheck(Commlev[lev], start_rank[lev], "after fill_level_data"); + + Parallel::KillBlocks(PatL[lev]); + PatL[lev]->destroyList(); + PatL[lev] = tmPat; +} + + +// the input lev is lower level for regrid +void cgh::Regrid_Onelevel_aux(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0, + MyList *OldList, MyList *StateList, + MyList *FutureList, MyList *tmList, bool BB, + monitor *ErrorMonitor) +{ + lev++; + if (lev < movls) + return; + + // for moving part + bool tot_flag = false; + double **tmpPorg; + tmpPorg = new double *[BH_num]; + for (int bhi = 0; bhi < BH_num; bhi++) + { + tmpPorg[bhi] = new double[dim]; + for (int i = 0; i < dim; i++) + tmpPorg[bhi][i] = Porgbr[bhi][i]; + } + + for (int grd = 0; grd < grids[lev]; grd++) + { + int flag; + int do_every = 2; + double dX = PatL[lev]->data->blb->data->getdX(0); + double dY = PatL[lev]->data->blb->data->getdX(1); + double dZ = PatL[lev]->data->blb->data->getdX(2); + double rr; + // make sure that the grid corresponds to the black hole + int bhi = 0; + for (bhi = 0; bhi < BH_num; bhi++) + { + // because finner level may also change Porgbr, so we need factor 2 + if (feq(Porgbr[bhi][0], handle[lev][grd][0], 2 * do_every * dX) && + feq(Porgbr[bhi][1], handle[lev][grd][1], 2 * do_every * dY) && + feq(Porgbr[bhi][2], handle[lev][grd][2], 2 * do_every * dZ)) + break; + } + if (bhi == BH_num) + { + // if the box has already touched the original point + if (feq(0, bbox[lev][grd][0], dX / 2) && + feq(0, bbox[lev][grd][1], dY / 2) && + feq(0, bbox[lev][grd][2], dZ / 2)) + break; + + if (BH_num == 1) + { + bhi = 0; + break; + } // if only one black hole, it definitely match! + + if (ErrorMonitor->outfile) + { + ErrorMonitor->outfile << "cgh::Regrid: no black hole matches with grid lev#" << lev << " grd#" << grd + << " with handle (" << handle[lev][grd][0] << "," << handle[lev][grd][1] << "," << handle[lev][grd][2] << ")" << endl; + ErrorMonitor->outfile << "black holes' old positions:" << endl; + for (bhi = 0; bhi < BH_num; bhi++) + ErrorMonitor->outfile << "#" << bhi << ": (" << Porgbr[bhi][0] << "," << Porgbr[bhi][1] << "," << Porgbr[bhi][2] << ")" << endl; + ErrorMonitor->outfile << "tolerance:" << endl; + ErrorMonitor->outfile << "(" << 2 * do_every * dX << "," << 2 * do_every * dY << "," << 2 * do_every * dZ << ")" << endl; + ErrorMonitor->outfile << "box lower boundary: (" << bbox[lev][grd][0] << "," << bbox[lev][grd][1] << "," << bbox[lev][grd][2] << ")" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (bhi = 0; bhi < BH_num; bhi++) + delete[] tmpPorg[bhi]; + delete[] tmpPorg; + return; + } + // x direction + rr = (Porg0[bhi][0] - handle[lev][grd][0]) / dX; + if (rr > 0) + flag = int(rr + 0.5) / do_every; + else + flag = int(rr - 0.5) / do_every; + flag = flag * do_every; + rr = bbox[lev][grd][0] + flag * dX; + // pay attention to the symmetric case + if (Symmetry == 2 && rr < 0) + rr = -bbox[lev][grd][0]; + else + rr = flag * dX; + + if (fabs(rr) > dX / 2) + { + tot_flag = true; + bbox[lev][grd][0] = bbox[lev][grd][0] + rr; + bbox[lev][grd][3] = bbox[lev][grd][3] + rr; + handle[lev][grd][0] += rr; + tmpPorg[bhi][0] = Porg0[bhi][0]; + } + + // y direction + rr = (Porg0[bhi][1] - handle[lev][grd][1]) / dY; + if (rr > 0) + flag = int(rr + 0.5) / do_every; + else + flag = int(rr - 0.5) / do_every; + flag = flag * do_every; + rr = bbox[lev][grd][1] + flag * dY; + // pay attention to the symmetric case + if (Symmetry == 2 && rr < 0) + rr = -bbox[lev][grd][1]; + else + rr = flag * dY; + + if (fabs(rr) > dY / 2) + { + tot_flag = true; + bbox[lev][grd][1] = bbox[lev][grd][1] + rr; + bbox[lev][grd][4] = bbox[lev][grd][4] + rr; + handle[lev][grd][1] += rr; + tmpPorg[bhi][1] = Porg0[bhi][1]; + } + + // z direction + rr = (Porg0[bhi][2] - handle[lev][grd][2]) / dZ; + if (rr > 0) + flag = int(rr + 0.5) / do_every; + else + flag = int(rr - 0.5) / do_every; + flag = flag * do_every; + rr = bbox[lev][grd][2] + flag * dZ; + // pay attention to the symmetric case + if (Symmetry > 0 && rr < 0) + rr = -bbox[lev][grd][1]; + else + rr = flag * dZ; + + if (fabs(rr) > dZ / 2) + { + tot_flag = true; + bbox[lev][grd][2] = bbox[lev][grd][2] + rr; + bbox[lev][grd][5] = bbox[lev][grd][5] + rr; + handle[lev][grd][2] += rr; + tmpPorg[bhi][2] = Porg0[bhi][2]; + } + } + + if (tot_flag) + { + int nprocs; + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + recompose_cgh_Onelevel(nprocs, lev, OldList, StateList, FutureList, tmList, Symmetry, BB); + } + + for (int bhi = 0; bhi < BH_num; bhi++) + delete[] tmpPorg[bhi]; + delete[] tmpPorg; +} +#endif + + +void cgh::settrfls(const int lev) +{ + trfls = lev; +} + +//================================================================================================ +// Load Balancing Functions +//================================================================================================ + +// Initialize load balancing +void cgh::init_load_balance(int nprocs) +{ + if (rank_interp_times) + delete[] rank_interp_times; + if (heavy_ranks) + delete[] heavy_ranks; + + rank_interp_times = new double[nprocs]; + heavy_ranks = new int[4]; // Maximum 4 heavy ranks + num_heavy_ranks = 0; + + for (int i = 0; i < nprocs; i++) + rank_interp_times[i] = 0.0; +} + +// Update interpolation time for a rank +void cgh::update_interp_time(int rank, double time) +{ + if (rank_interp_times && rank >= 0) + { + rank_interp_times[rank] = time; + } +} + +// Check and perform load balancing if needed +bool cgh::check_and_rebalance(int nprocs, int lev, + MyList *OldList, MyList *StateList, + MyList *FutureList, MyList *tmList, + int Symmetry, bool BB) +{ + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + // Only check at specified intervals + current_time_step++; + if (current_time_step % load_balance_check_interval != 0) + return false; + + if (myrank == 0) + { + cout << "\n=== Checking load balance at time step " << current_time_step << " ===" << endl; + } + + // Collect all rank times on rank 0 + double *all_times = nullptr; + if (myrank == 0) + { + all_times = new double[nprocs]; + } + + MPI_Gather(rank_interp_times, 1, MPI_DOUBLE, all_times, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + + bool need_rebalance = false; + + if (myrank == 0) + { + // Check if load balancing is needed + need_rebalance = Parallel::check_load_balance_need(all_times, nprocs, num_heavy_ranks, heavy_ranks); + + if (need_rebalance) + { + cout << "=== Load imbalance detected! Need to rebalance ===" << endl; + cout << "Top " << num_heavy_ranks << " heavy ranks: "; + for (int i = 0; i < num_heavy_ranks; i++) + { + cout << heavy_ranks[i] << " (" << all_times[heavy_ranks[i]] << " s) "; + } + cout << endl; + + // Analyze blocks that need to be split + Parallel::split_heavy_blocks(PatL[lev], heavy_ranks, num_heavy_ranks, 2, nprocs, ingfs, fngfs); + + // Set lev_flag to trigger recompose_cgh + cout << "=== Triggering recompose_cgh for level " << lev << " ===" << endl; + } + else + { + cout << "=== Load is balanced, no rebalancing needed ===" << endl; + } + + delete[] all_times; + } + + // Broadcast the decision to all ranks + MPI_Bcast(&need_rebalance, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD); + + if (need_rebalance) + { + // Broadcast heavy ranks information + MPI_Bcast(&num_heavy_ranks, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(heavy_ranks, num_heavy_ranks, MPI_INT, 0, MPI_COMM_WORLD); + + // Perform recompose_cgh on the specified level + if (myrank == 0) + { + cout << "=== Performing recompose_cgh ===" << endl; + } + + // Call recompose_cgh_Onelevel for the specified level + bool *lev_flag = new bool[1]; + lev_flag[0] = true; + recompose_cgh_Onelevel(nprocs, lev, OldList, StateList, FutureList, tmList, Symmetry, BB); + delete[] lev_flag; + + // Reset time counter after rebalancing + current_time_step = 0; + + return true; + } + + return false; +} diff --git a/AMSS_NCKU_source/cgh.h b/AMSS_NCKU_source/cgh.h index 79e7bf6..0402481 100644 --- a/AMSS_NCKU_source/cgh.h +++ b/AMSS_NCKU_source/cgh.h @@ -1,92 +1,107 @@ - -#ifndef CGH_H -#define CGH_H - -#include -#include "MyList.h" -#include "MPatch.h" -#include "macrodef.h" -#include "monitor.h" -#include "Parallel.h" - -class cgh -{ - -public: - int levels, movls, BH_num_in; - // information of boxes - int *grids; - double ***bbox; - int ***shape; - double ***handle; - double ***Porgls; - double *Lt; - - // information of Patch list - MyList **PatL; - -// information of OutBdLow2Hi point list and Restrict point list -#if (RPB == 1) - MyList **bdsul, **rsul; -#endif - -#if (PSTR == 1 || PSTR == 2 || PSTR == 3) - int mylev; - int *start_rank, *end_rank; - MPI_Comm *Commlev; -#endif - -protected: - int ingfs, fngfs; - static constexpr double ratio = 0.75; - int trfls; - -public: - cgh(int ingfsi, int fngfsi, int Symmetry, char *filename, int checkrun, monitor *ErrorMonitor); - - ~cgh(); - - void compose_cgh(int nprocs); - void sethandle(monitor *ErrorMonitor); - void checkPatchList(MyList *PatL, bool buflog); - void Regrid(int Symmetry, int BH_num, double **Porgbr, double **Porg0, - MyList *OldList, MyList *StateList, - MyList *FutureList, MyList *tmList, bool BB, - monitor *ErrorMonitor); - void Regrid_fake(int Symmetry, int BH_num, double **Porgbr, double **Porg0, - MyList *OldList, MyList *StateList, - MyList *FutureList, MyList *tmList, bool BB, - monitor *ErrorMonitor); - void recompose_cgh(int nprocs, bool *lev_flag, - MyList *OldList, MyList *StateList, - MyList *FutureList, MyList *tmList, - int Symmetry, bool BB); - void recompose_cgh_fake(int nprocs, bool *lev_flag, - MyList *OldList, MyList *StateList, - MyList *FutureList, MyList *tmList, - int Symmetry, bool BB); - void read_bbox(int Symmetry, char *filename); - MyList *construct_patchlist(int lev, int Symmetry); - bool Interp_One_Point(MyList *VarList, - double *XX, /*input global Cartesian coordinate*/ - double *Shellf, int Symmetry); - void recompose_cgh_Onelevel(int nprocs, int lev, - MyList *OldList, MyList *StateList, - MyList *FutureList, MyList *tmList, - int Symmetry, bool BB); - void Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0, - MyList *OldList, MyList *StateList, - MyList *FutureList, MyList *tmList, bool BB, - monitor *ErrorMonitor); - void Regrid_Onelevel_aux(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0, - MyList *OldList, MyList *StateList, - MyList *FutureList, MyList *tmList, bool BB, - monitor *ErrorMonitor); - void settrfls(const int lev); - -#if (PSTR == 1 || PSTR == 2 || PSTR == 3) - void construct_mylev(int nprocs); -#endif -}; - -#endif /* CGH_H */ + +#ifndef CGH_H +#define CGH_H + +#include +#include "MyList.h" +#include "MPatch.h" +#include "macrodef.h" +#include "monitor.h" +#include "Parallel.h" + +class cgh +{ + +public: + int levels, movls, BH_num_in; + // information of boxes + int *grids; + double ***bbox; + int ***shape; + double ***handle; + double ***Porgls; + double *Lt; + + // information of Patch list + MyList **PatL; + +// information of OutBdLow2Hi point list and Restrict point list +#if (RPB == 1) + MyList **bdsul, **rsul; +#endif + +#if (PSTR == 1 || PSTR == 2 || PSTR == 3) + int mylev; + int *start_rank, *end_rank; + MPI_Comm *Commlev; +#endif + +protected: + int ingfs, fngfs; + static constexpr double ratio = 0.75; + int trfls; + +public: + cgh(int ingfsi, int fngfsi, int Symmetry, char *filename, int checkrun, monitor *ErrorMonitor); + + ~cgh(); + + void compose_cgh(int nprocs); + void sethandle(monitor *ErrorMonitor); + void checkPatchList(MyList *PatL, bool buflog); + void Regrid(int Symmetry, int BH_num, double **Porgbr, double **Porg0, + MyList *OldList, MyList *StateList, + MyList *FutureList, MyList *tmList, bool BB, + monitor *ErrorMonitor); + void Regrid_fake(int Symmetry, int BH_num, double **Porgbr, double **Porg0, + MyList *OldList, MyList *StateList, + MyList *FutureList, MyList *tmList, bool BB, + monitor *ErrorMonitor); + void recompose_cgh(int nprocs, bool *lev_flag, + MyList *OldList, MyList *StateList, + MyList *FutureList, MyList *tmList, + int Symmetry, bool BB); + void recompose_cgh_fake(int nprocs, bool *lev_flag, + MyList *OldList, MyList *StateList, + MyList *FutureList, MyList *tmList, + int Symmetry, bool BB); + void read_bbox(int Symmetry, char *filename); + MyList *construct_patchlist(int lev, int Symmetry); + bool Interp_One_Point(MyList *VarList, + double *XX, /*input global Cartesian coordinate*/ + double *Shellf, int Symmetry); + void recompose_cgh_Onelevel(int nprocs, int lev, + MyList *OldList, MyList *StateList, + MyList *FutureList, MyList *tmList, + int Symmetry, bool BB); + void Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0, + MyList *OldList, MyList *StateList, + MyList *FutureList, MyList *tmList, bool BB, + monitor *ErrorMonitor); + void Regrid_Onelevel_aux(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0, + MyList *OldList, MyList *StateList, + MyList *FutureList, MyList *tmList, bool BB, + monitor *ErrorMonitor); + void settrfls(const int lev); + +#if (PSTR == 1 || PSTR == 2 || PSTR == 3) + void construct_mylev(int nprocs); +#endif + + // Load balancing support + bool enable_load_balance; // Enable load balancing + int load_balance_check_interval; // Check interval (in time steps) + int current_time_step; // Current time step counter + double *rank_interp_times; // Store interpolation times for each rank + int *heavy_ranks; // Store heavy rank numbers + int num_heavy_ranks; // Number of heavy ranks + + void init_load_balance(int nprocs); + void update_interp_time(int rank, double time); + bool check_and_rebalance(int nprocs, int lev, + MyList *OldList, MyList *StateList, + MyList *FutureList, MyList *tmList, + int Symmetry, bool BB); +}; + +#endif /* CGH_H */ diff --git a/AMSS_NCKU_source/surface_integral.C b/AMSS_NCKU_source/surface_integral.C index c2b7b67..44edce3 100644 --- a/AMSS_NCKU_source/surface_integral.C +++ b/AMSS_NCKU_source/surface_integral.C @@ -1,3751 +1,3750 @@ - -//---------------------------------------------------------------- -// Using Gauss-Legendre quadrature in theta direction -// and trapezoidal rule in phi direction (from Second Euler-Maclaurin summation formula, we can see that -// this method gives expolential convergence for periodic function) -//---------------------------------------------------------------- -#ifdef newc -#include -#include -#include -#include -#include -#include -using namespace std; -#else -#include -#include -#include -#include -#include -#include -#endif -#include - -#include "misc.h" -#include "cgh.h" -#include "Parallel.h" -#include "surface_integral.h" -#include "fadmquantites_bssn.h" -#include "getnpem2.h" -#include "getnp4.h" -#include "parameters.h" - -#define PI M_PI -//|============================================================================ -//| Constructor -//|============================================================================ - -surface_integral::surface_integral(int iSymmetry) : Symmetry(iSymmetry) -{ - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - MPI_Comm_size(MPI_COMM_WORLD, &cpusize); - int N = 40; - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - char pname[50]; - { - map::iterator iter = parameters::str_par.find("inputpar"); - if (iter != parameters::str_par.end()) - { - strcpy(pname, (iter->second).c_str()); - } - else - { - cout << "Error inputpar" << endl; - exit(0); - } - } - ifstream inf(pname, ifstream::in); - if (!inf.good() && myrank == 0) - { - cout << "Can not open parameter file " << pname << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - cout << "error reading parameter file " << pname << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "SurfaceIntegral") - { - if (skey == "number of points for quarter sphere") - N = atoi(sval.c_str()); - } - } - inf.close(); - } - //|-----number of points for whole [0,pi] x [0,2pi] - N_phi = 4 * N; // for simplicity, we require this number must be 4*N - N_theta = 2 * N; // 2*N - - if (myrank == 0) - { - cout << "-----------------------------------------------------------------------" << endl; -#ifdef GaussInt - cout << " spherical integration for wave form extraction with Gauss method " << endl; -#else - cout << " spherical integration for wave form extraction with mid point method " << endl; -#endif - cout << " N_phi = " << N_phi << endl; - cout << " N_theta = " << N_theta << endl; - cout << "-----------------------------------------------------------------------" << endl; - } - -#ifdef GaussInt - // weight function cover all of [0,pi] - arcostheta = new double[N_theta]; - wtcostheta = new double[N_theta]; - - // note: theta in [0,pi/2], upper half sphere, corresponds to 1 < costheta < 0 - misc::gaulegf(-1.0, 1.0, arcostheta, wtcostheta, N_theta); - // due to symmetry, I need first half array corresponds to upper sphere, note these two arrays must match each other - misc::inversearray(arcostheta, N_theta); - misc::inversearray(wtcostheta, N_theta); -#endif - - if (Symmetry == 2) - { - N_phi = N_phi / 4; - N_theta = N_theta / 2; - dphi = PI / (2.0 * N_phi); - dcostheta = 1.0 / N_theta; - factor = 8; - } - else if (Symmetry == 1) - { - N_theta = N_theta / 2; - dphi = 2.0 * PI / N_phi; - dcostheta = 1.0 / N_theta; - factor = 2; - } - else if (Symmetry == 0) - { - dphi = 2.0 * PI / N_phi; - dcostheta = 2.0 / N_theta; - factor = 1; - } - else if (myrank == 0) - { - cout << "surface_integral::surface_integral: not supported Symmetry setting!" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - -#ifndef GaussInt - // weight function cover all of [0,pi] - arcostheta = new double[N_theta]; -#endif - n_tot = N_theta * N_phi; - nx_g = new double[n_tot]; - ny_g = new double[n_tot]; - nz_g = new double[n_tot]; - - int n = 0; - double costheta, sintheta, ph; - - for (int i = 0; i < N_theta; ++i) - { -#ifndef GaussInt - arcostheta[i] = 1.0 - (i + 0.5) * dcostheta; -#endif - costheta = arcostheta[i]; - sintheta = sqrt(1.0 - costheta * costheta); - - for (int j = 0; j < N_phi; ++j) - { - ph = (j + 0.5) * dphi; - // normal vector respect to the constant R sphere - nx_g[n] = sintheta * cos(ph); - ny_g[n] = sintheta * sin(ph); - nz_g[n] = costheta; - n++; - } - } -} - -//|============================================================================ -//| Destructor -//|============================================================================ -surface_integral::~surface_integral() -{ - delete[] nx_g; - delete[] ny_g; - delete[] nz_g; - delete[] arcostheta; -#ifdef GaussInt - delete[] wtcostheta; -#endif -} -//|---------------------------------------------------------------- -// spin weighted spinw component of psi4, general routine -// l takes from spinw to maxl; m takes from -l to l -//|---------------------------------------------------------------- -void surface_integral::surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *Ipsi4, - int spinw, int maxl, int NN, double *RP, double *IP, - monitor *Monitor) // NN is the length of RP and IP -{ - if (myrank == 0 && GH->grids[lev] != 1) - if (Monitor->outfile) - Monitor->outfile << "WARNING: surface integral on multipatches" << endl; - else - cout << "WARNING: surface integral on multipatches" << endl; - - const int InList = 2; - - MyList *DG_List = new MyList(Rpsi4); - DG_List->insert(Ipsi4); - - int n; - double *pox[3]; - for (int i = 0; i < 3; i++) - pox[i] = new double[n_tot]; - for (n = 0; n < n_tot; n++) - { - pox[0][n] = rex * nx_g[n]; - pox[1][n] = rex * ny_g[n]; - pox[2][n] = rex * nz_g[n]; - } - - int mp, Lp, Nmin, Nmax; - mp = n_tot / cpusize; - Lp = n_tot - cpusize * mp; - if (Lp > myrank) - { - Nmin = myrank * mp + myrank; - Nmax = Nmin + mp; - } - else - { - Nmin = myrank * mp + Lp; - Nmax = Nmin + mp - 1; - } - - double *shellf; - shellf = new double[n_tot * InList]; - - GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry, Nmin, Nmax); - - //|~~~~~> Integrate the dot product of Dphi with the surface normal. - - double *RP_out, *IP_out; - RP_out = new double[NN]; - IP_out = new double[NN]; - - for (int ii = 0; ii < NN; ii++) - { - RP_out[ii] = 0; - IP_out[ii] = 0; - } - // theta part - double costheta, thetap; - double cosmphi, sinmphi; - - int i, j; - int lpsy = 0; - if (Symmetry == 0) - lpsy = 1; - else if (Symmetry == 1) - lpsy = 2; - else if (Symmetry == 2) - lpsy = 8; - - double psi4RR, psi4II; - for (n = Nmin; n <= Nmax; n++) - { - // need round off always - i = int(n / N_phi); // int(1.723) = 1, int(-1.732) = -1 - j = n - i * N_phi; - - int countlm = 0; - for (int pl = spinw; pl < maxl + 1; pl++) - for (int pm = -pl; pm < pl + 1; pm++) - { - for (int lp = 0; lp < lpsy; lp++) - { - switch (lp) - { - case 0: //+++ (theta, phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = sin(pm * (j + 0.5) * dphi); - psi4RR = shellf[InList * n]; - psi4II = shellf[InList * n + 1]; - break; - case 1: //++- (pi-theta, phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = sin(pm * (j + 0.5) * dphi); - psi4RR = Rpsi4->SoA[2] * shellf[InList * n]; - psi4II = Ipsi4->SoA[2] * shellf[InList * n + 1]; - break; - case 2: //+-+ (theta, 2*pi-phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = -sin(pm * (j + 0.5) * dphi); - psi4RR = Rpsi4->SoA[1] * shellf[InList * n]; - psi4II = Ipsi4->SoA[1] * shellf[InList * n + 1]; - break; - case 3: //+-- (pi-theta, 2*pi-phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = -sin(pm * (j + 0.5) * dphi); - psi4RR = Rpsi4->SoA[2] * Rpsi4->SoA[1] * shellf[InList * n]; - psi4II = Ipsi4->SoA[2] * Ipsi4->SoA[1] * shellf[InList * n + 1]; - break; - case 4: //-++ (theta, pi-phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); - psi4RR = Rpsi4->SoA[0] * shellf[InList * n]; - psi4II = Ipsi4->SoA[0] * shellf[InList * n + 1]; - break; - case 5: //-+- (pi-theta, pi-phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); - psi4RR = Rpsi4->SoA[2] * Rpsi4->SoA[0] * shellf[InList * n]; - psi4II = Ipsi4->SoA[2] * Ipsi4->SoA[0] * shellf[InList * n + 1]; - break; - case 6: //--+ (theta, pi+phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); - psi4RR = Rpsi4->SoA[1] * Rpsi4->SoA[0] * shellf[InList * n]; - psi4II = Ipsi4->SoA[1] * Ipsi4->SoA[0] * shellf[InList * n + 1]; - break; - case 7: //--- (pi-theta, pi+phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); - psi4RR = Rpsi4->SoA[2] * Rpsi4->SoA[1] * Rpsi4->SoA[0] * shellf[InList * n]; - psi4II = Ipsi4->SoA[2] * Ipsi4->SoA[1] * Ipsi4->SoA[0] * shellf[InList * n + 1]; - } - - thetap = sqrt((2 * pl + 1.0) / 4.0 / PI) * misc::Wigner_d_function(pl, pm, spinw, costheta); // note the variation from -2 to 2 -#ifdef GaussInt - // wtcostheta is even function respect costheta - RP_out[countlm] = RP_out[countlm] + thetap * (psi4RR * cosmphi + psi4II * sinmphi) * wtcostheta[i]; - IP_out[countlm] = IP_out[countlm] + thetap * (psi4II * cosmphi - psi4RR * sinmphi) * wtcostheta[i]; -#else - RP_out[countlm] = RP_out[countlm] + thetap * (psi4RR * cosmphi + psi4II * sinmphi); - IP_out[countlm] = IP_out[countlm] + thetap * (psi4II * cosmphi - psi4RR * sinmphi); -#endif - } - countlm++; // no sanity check for countlm and NN which should be noted in the input parameters - } - } - - for (int ii = 0; ii < NN; ii++) - { -#ifdef GaussInt - RP_out[ii] = RP_out[ii] * rex * dphi; - IP_out[ii] = IP_out[ii] * rex * dphi; -#else - RP_out[ii] = RP_out[ii] * rex * dphi * dcostheta; - IP_out[ii] = IP_out[ii] * rex * dphi * dcostheta; -#endif - } - //|------+ Communicate and sum the results from each processor. - - { - double *RPIP_out = new double[2 * NN]; - double *RPIP = new double[2 * NN]; - memcpy(RPIP_out, RP_out, NN * sizeof(double)); - memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); - MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - memcpy(RP, RPIP, NN * sizeof(double)); - memcpy(IP, RPIP + NN, NN * sizeof(double)); - delete[] RPIP_out; - delete[] RPIP; - } - - //|------= Free memory. - - delete[] pox[0]; - delete[] pox[1]; - delete[] pox[2]; - delete[] shellf; - delete[] RP_out; - delete[] IP_out; - DG_List->clearList(); -} -void surface_integral::surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *Ipsi4, - int spinw, int maxl, int NN, double *RP, double *IP, - monitor *Monitor, MPI_Comm Comm_here) // NN is the length of RP and IP -{ - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"start surface_integral::surf_Wave"); - - int lmyrank; - MPI_Comm_rank(Comm_here, &lmyrank); - if (lmyrank == 0 && GH->grids[lev] != 1) - if (Monitor->outfile) - Monitor->outfile << "WARNING: surface integral on multipatches" << endl; - else - cout << "WARNING: surface integral on multipatches" << endl; - - const int InList = 2; - - MyList *DG_List = new MyList(Rpsi4); - DG_List->insert(Ipsi4); - - int n; - double *pox[3]; - for (int i = 0; i < 3; i++) - pox[i] = new double[n_tot]; - for (n = 0; n < n_tot; n++) - { - pox[0][n] = rex * nx_g[n]; - pox[1][n] = rex * ny_g[n]; - pox[2][n] = rex * nz_g[n]; - } - - double *shellf; - shellf = new double[n_tot * InList]; - - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Interp_Points"); - - GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry, Comm_here); - - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Interp_Points"); - - int mp, Lp, Nmin, Nmax; - - int cpusize_here; - MPI_Comm_size(Comm_here, &cpusize_here); - - mp = n_tot / cpusize_here; - Lp = n_tot - cpusize_here * mp; - - if (Lp > lmyrank) - { - Nmin = lmyrank * mp + lmyrank; - Nmax = Nmin + mp; - } - else - { - Nmin = lmyrank * mp + Lp; - Nmax = Nmin + mp - 1; - } - - //|~~~~~> Integrate the dot product of Dphi with the surface normal. - - double *RP_out, *IP_out; - RP_out = new double[NN]; - IP_out = new double[NN]; - - for (int ii = 0; ii < NN; ii++) - { - RP_out[ii] = 0; - IP_out[ii] = 0; - } - // theta part - double costheta, thetap; - double cosmphi, sinmphi; - - int i, j; - int lpsy = 0; - if (Symmetry == 0) - lpsy = 1; - else if (Symmetry == 1) - lpsy = 2; - else if (Symmetry == 2) - lpsy = 8; - - double psi4RR, psi4II; - for (n = Nmin; n <= Nmax; n++) - { - // need round off always - i = int(n / N_phi); // int(1.723) = 1, int(-1.732) = -1 - j = n - i * N_phi; - - int countlm = 0; - for (int pl = spinw; pl < maxl + 1; pl++) - for (int pm = -pl; pm < pl + 1; pm++) - { - for (int lp = 0; lp < lpsy; lp++) - { - switch (lp) - { - case 0: //+++ (theta, phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = sin(pm * (j + 0.5) * dphi); - psi4RR = shellf[InList * n]; - psi4II = shellf[InList * n + 1]; - break; - case 1: //++- (pi-theta, phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = sin(pm * (j + 0.5) * dphi); - psi4RR = Rpsi4->SoA[2] * shellf[InList * n]; - psi4II = Ipsi4->SoA[2] * shellf[InList * n + 1]; - break; - case 2: //+-+ (theta, 2*pi-phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = -sin(pm * (j + 0.5) * dphi); - psi4RR = Rpsi4->SoA[1] * shellf[InList * n]; - psi4II = Ipsi4->SoA[1] * shellf[InList * n + 1]; - break; - case 3: //+-- (pi-theta, 2*pi-phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = -sin(pm * (j + 0.5) * dphi); - psi4RR = Rpsi4->SoA[2] * Rpsi4->SoA[1] * shellf[InList * n]; - psi4II = Ipsi4->SoA[2] * Ipsi4->SoA[1] * shellf[InList * n + 1]; - break; - case 4: //-++ (theta, pi-phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); - psi4RR = Rpsi4->SoA[0] * shellf[InList * n]; - psi4II = Ipsi4->SoA[0] * shellf[InList * n + 1]; - break; - case 5: //-+- (pi-theta, pi-phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); - psi4RR = Rpsi4->SoA[2] * Rpsi4->SoA[0] * shellf[InList * n]; - psi4II = Ipsi4->SoA[2] * Ipsi4->SoA[0] * shellf[InList * n + 1]; - break; - case 6: //--+ (theta, pi+phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); - psi4RR = Rpsi4->SoA[1] * Rpsi4->SoA[0] * shellf[InList * n]; - psi4II = Ipsi4->SoA[1] * Ipsi4->SoA[0] * shellf[InList * n + 1]; - break; - case 7: //--- (pi-theta, pi+phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); - psi4RR = Rpsi4->SoA[2] * Rpsi4->SoA[1] * Rpsi4->SoA[0] * shellf[InList * n]; - psi4II = Ipsi4->SoA[2] * Ipsi4->SoA[1] * Ipsi4->SoA[0] * shellf[InList * n + 1]; - } - - thetap = sqrt((2 * pl + 1.0) / 4.0 / PI) * misc::Wigner_d_function(pl, pm, spinw, costheta); // note the variation from -2 to 2 -#ifdef GaussInt - // wtcostheta is even function respect costheta - RP_out[countlm] = RP_out[countlm] + thetap * (psi4RR * cosmphi + psi4II * sinmphi) * wtcostheta[i]; - IP_out[countlm] = IP_out[countlm] + thetap * (psi4II * cosmphi - psi4RR * sinmphi) * wtcostheta[i]; -#else - RP_out[countlm] = RP_out[countlm] + thetap * (psi4RR * cosmphi + psi4II * sinmphi); - IP_out[countlm] = IP_out[countlm] + thetap * (psi4II * cosmphi - psi4RR * sinmphi); -#endif - } - countlm++; // no sanity check for countlm and NN which should be noted in the input parameters - } - } - - for (int ii = 0; ii < NN; ii++) - { -#ifdef GaussInt - RP_out[ii] = RP_out[ii] * rex * dphi; - IP_out[ii] = IP_out[ii] * rex * dphi; -#else - RP_out[ii] = RP_out[ii] * rex * dphi * dcostheta; - IP_out[ii] = IP_out[ii] * rex * dphi * dcostheta; -#endif - } - //|------+ Communicate and sum the results from each processor. - - { - double *RPIP_out = new double[2 * NN]; - double *RPIP = new double[2 * NN]; - memcpy(RPIP_out, RP_out, NN * sizeof(double)); - memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); - MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, Comm_here); - memcpy(RP, RPIP, NN * sizeof(double)); - memcpy(IP, RPIP + NN, NN * sizeof(double)); - delete[] RPIP_out; - delete[] RPIP; - } - - //|------= Free memory. - - delete[] pox[0]; - delete[] pox[1]; - delete[] pox[2]; - delete[] shellf; - delete[] RP_out; - delete[] IP_out; - DG_List->clearList(); -} -//|---------------------------------------------------------------- -// for shell patch -//|---------------------------------------------------------------- -void surface_integral::surf_Wave(double rex, int lev, ShellPatch *GH, var *Rpsi4, var *Ipsi4, - int spinw, int maxl, int NN, double *RP, double *IP, - monitor *Monitor) // NN is the length of RP and IP -{ - const int InList = 2; - - MyList *DG_List = new MyList(Rpsi4); - DG_List->insert(Ipsi4); - - int n; - double *pox[3]; - for (int i = 0; i < 3; i++) - pox[i] = new double[n_tot]; - for (n = 0; n < n_tot; n++) - { - pox[0][n] = rex * nx_g[n]; - pox[1][n] = rex * ny_g[n]; - pox[2][n] = rex * nz_g[n]; - } - - double *shellf; - shellf = new double[n_tot * InList]; - - GH->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry); - - int mp, Lp, Nmin, Nmax; - - mp = n_tot / cpusize; - Lp = n_tot - cpusize * mp; - - if (Lp > myrank) - { - Nmin = myrank * mp + myrank; - Nmax = Nmin + mp; - } - else - { - Nmin = myrank * mp + Lp; - Nmax = Nmin + mp - 1; - } - - //|~~~~~> Integrate the dot product of Dphi with the surface normal. - - double *RP_out, *IP_out; - RP_out = new double[NN]; - IP_out = new double[NN]; - - for (int ii = 0; ii < NN; ii++) - { - RP_out[ii] = 0; - IP_out[ii] = 0; - } - // theta part - double costheta, thetap; - double cosmphi, sinmphi; - - int i, j; - int lpsy = 0; - if (Symmetry == 0) - lpsy = 1; - else if (Symmetry == 1) - lpsy = 2; - else if (Symmetry == 2) - lpsy = 8; - - double psi4RR, psi4II; - for (n = Nmin; n <= Nmax; n++) - { - // need round off always - i = int(n / N_phi); // int(1.723) = 1, int(-1.732) = -1 - j = n - i * N_phi; - - int countlm = 0; - for (int pl = spinw; pl < maxl + 1; pl++) - for (int pm = -pl; pm < pl + 1; pm++) - { - for (int lp = 0; lp < lpsy; lp++) - { - switch (lp) - { - case 0: //+++ (theta, phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = sin(pm * (j + 0.5) * dphi); - psi4RR = shellf[InList * n]; - psi4II = shellf[InList * n + 1]; - break; - case 1: //++- (pi-theta, phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = sin(pm * (j + 0.5) * dphi); - psi4RR = Rpsi4->SoA[2] * shellf[InList * n]; - psi4II = Ipsi4->SoA[2] * shellf[InList * n + 1]; - break; - case 2: //+-+ (theta, 2*pi-phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = -sin(pm * (j + 0.5) * dphi); - psi4RR = Rpsi4->SoA[1] * shellf[InList * n]; - psi4II = Ipsi4->SoA[1] * shellf[InList * n + 1]; - break; - case 3: //+-- (pi-theta, 2*pi-phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = -sin(pm * (j + 0.5) * dphi); - psi4RR = Rpsi4->SoA[2] * Rpsi4->SoA[1] * shellf[InList * n]; - psi4II = Ipsi4->SoA[2] * Ipsi4->SoA[1] * shellf[InList * n + 1]; - break; - case 4: //-++ (theta, pi-phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); - psi4RR = Rpsi4->SoA[0] * shellf[InList * n]; - psi4II = Ipsi4->SoA[0] * shellf[InList * n + 1]; - break; - case 5: //-+- (pi-theta, pi-phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); - psi4RR = Rpsi4->SoA[2] * Rpsi4->SoA[0] * shellf[InList * n]; - psi4II = Ipsi4->SoA[2] * Ipsi4->SoA[0] * shellf[InList * n + 1]; - break; - case 6: //--+ (theta, pi+phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); - psi4RR = Rpsi4->SoA[1] * Rpsi4->SoA[0] * shellf[InList * n]; - psi4II = Ipsi4->SoA[1] * Ipsi4->SoA[0] * shellf[InList * n + 1]; - break; - case 7: //--- (pi-theta, pi+phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); - psi4RR = Rpsi4->SoA[2] * Rpsi4->SoA[1] * Rpsi4->SoA[0] * shellf[InList * n]; - psi4II = Ipsi4->SoA[2] * Ipsi4->SoA[1] * Ipsi4->SoA[0] * shellf[InList * n + 1]; - } - - thetap = sqrt((2 * pl + 1.0) / 4.0 / PI) * misc::Wigner_d_function(pl, pm, spinw, costheta); // note the variation from -2 to 2 -#ifdef GaussInt - // wtcostheta is even function respect costheta - RP_out[countlm] = RP_out[countlm] + thetap * (psi4RR * cosmphi + psi4II * sinmphi) * wtcostheta[i]; - IP_out[countlm] = IP_out[countlm] + thetap * (psi4II * cosmphi - psi4RR * sinmphi) * wtcostheta[i]; -#else - RP_out[countlm] = RP_out[countlm] + thetap * (psi4RR * cosmphi + psi4II * sinmphi); - IP_out[countlm] = IP_out[countlm] + thetap * (psi4II * cosmphi - psi4RR * sinmphi); -#endif - } - countlm++; // no sanity check for countlm and NN which should be noted in the input parameters - } - } - - for (int ii = 0; ii < NN; ii++) - { -#ifdef GaussInt - RP_out[ii] = RP_out[ii] * rex * dphi; - IP_out[ii] = IP_out[ii] * rex * dphi; -#else - RP_out[ii] = RP_out[ii] * rex * dphi * dcostheta; - IP_out[ii] = IP_out[ii] * rex * dphi * dcostheta; -#endif - } - //|------+ Communicate and sum the results from each processor. - - { - double *RPIP_out = new double[2 * NN]; - double *RPIP = new double[2 * NN]; - memcpy(RPIP_out, RP_out, NN * sizeof(double)); - memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); - MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - memcpy(RP, RPIP, NN * sizeof(double)); - memcpy(IP, RPIP + NN, NN * sizeof(double)); - delete[] RPIP_out; - delete[] RPIP; - } - - //|------= Free memory. - - delete[] pox[0]; - delete[] pox[1]; - delete[] pox[2]; - delete[] shellf; - delete[] RP_out; - delete[] IP_out; - DG_List->clearList(); -} -//|---------------------------------------------------------------- -// for shell patch -// for EM wave specially symmetric case -//|---------------------------------------------------------------- -void surface_integral::surf_Wave(double rex, int lev, ShellPatch *GH, - var *Ex, var *Ey, var *Ez, var *Bx, var *By, var *Bz, - var *chi, var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz, - int spinw, int maxl, int NN, double *RP, double *IP, - monitor *Monitor) // NN is the length of RP and IP -{ - const int InList = 13; - - MyList *DG_List = new MyList(Ex); - DG_List->insert(Ey); - DG_List->insert(Ez); - DG_List->insert(Bx); - DG_List->insert(By); - DG_List->insert(Bz); - DG_List->insert(chi); - DG_List->insert(gxx); - DG_List->insert(gxy); - DG_List->insert(gxz); - DG_List->insert(gyy); - DG_List->insert(gyz); - DG_List->insert(gzz); - - int n; - double *pox[3]; - for (int i = 0; i < 3; i++) - pox[i] = new double[n_tot]; - for (n = 0; n < n_tot; n++) - { - pox[0][n] = rex * nx_g[n]; - pox[1][n] = rex * ny_g[n]; - pox[2][n] = rex * nz_g[n]; - } - - double *shellf; - shellf = new double[n_tot * InList]; - - GH->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry); - - int mp, Lp, Nmin, Nmax; - - mp = n_tot / cpusize; - Lp = n_tot - cpusize * mp; - - if (Lp > myrank) - { - Nmin = myrank * mp + myrank; - Nmax = Nmin + mp; - } - else - { - Nmin = myrank * mp + Lp; - Nmax = Nmin + mp - 1; - } - - //|~~~~~> Integrate the dot product of Dphi with the surface normal. - - double *RP_out, *IP_out; - RP_out = new double[NN]; - IP_out = new double[NN]; - - for (int ii = 0; ii < NN; ii++) - { - RP_out[ii] = 0; - IP_out[ii] = 0; - } - // theta part - double costheta, thetap; - double cosmphi, sinmphi; - - int i, j; - int lpsy = 0; - if (Symmetry == 0) - lpsy = 1; - else if (Symmetry == 1) - lpsy = 2; - else if (Symmetry == 2) - lpsy = 8; - - double psi4RR, psi4II; - double px, py, pz; - double pEx, pEy, pEz, pBx, pBy, pBz; - double pchi, pgxx, pgxy, pgxz, pgyy, pgyz, pgzz; - for (n = Nmin; n <= Nmax; n++) - { - // need round off always - i = int(n / N_phi); // int(1.723) = 1, int(-1.732) = -1 - j = n - i * N_phi; - - int countlm = 0; - for (int pl = spinw; pl < maxl + 1; pl++) - for (int pm = -pl; pm < pl + 1; pm++) - { - for (int lp = 0; lp < lpsy; lp++) - { - px = pox[0][n]; - py = pox[1][n]; - pz = pox[2][n]; - pEx = shellf[InList * n]; - pEy = shellf[InList * n + 1]; - pEz = shellf[InList * n + 2]; - pBx = shellf[InList * n + 3]; - pBy = shellf[InList * n + 4]; - pBz = shellf[InList * n + 5]; - pchi = shellf[InList * n + 6]; - pgxx = shellf[InList * n + 7]; - pgxy = shellf[InList * n + 8]; - pgxz = shellf[InList * n + 9]; - pgyy = shellf[InList * n + 10]; - pgyz = shellf[InList * n + 11]; - pgzz = shellf[InList * n + 12]; - switch (lp) - { - case 0: //+++ (theta, phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = sin(pm * (j + 0.5) * dphi); - break; - case 1: //++- (pi-theta, phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = sin(pm * (j + 0.5) * dphi); - pz = -pz; - pEz = -pEz; - pBx = -pBx; - pBy = -pBy; - pgxz = -pgxz; - pgyz = -pgyz; - break; - case 2: //+-+ (theta, 2*pi-phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = -sin(pm * (j + 0.5) * dphi); - py = -py; - pEy = -pEy; - pBx = -pBx; - pBz = -pBz; - pgxy = -pgxy; - pgyz = -pgyz; - break; - case 3: //+-- (pi-theta, 2*pi-phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = -sin(pm * (j + 0.5) * dphi); - py = -py; - pz = -pz; - pEz = -pEz; - pBz = -pBz; - pgxz = -pgxz; - pEy = -pEy; - pBy = -pBy; - pgxy = -pgxy; - break; - case 4: //-++ (theta, pi-phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); - px = -px; - pEx = -pEx; - pBy = -pBy; - pBz = -pBz; - pgxy = -pgxy; - pgxz = -pgxz; - break; - case 5: //-+- (pi-theta, pi-phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); - pz = -pz; - px = -px; - pEz = -pEz; - pBz = -pBz; - pgyz = -pgyz; - pEx = -pEx; - pBx = -pBx; - pgxy = -pgxy; - break; - case 6: //--+ (theta, pi+phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); - px = -px; - py = -py; - pEx = -pEx; - pBx = -pBx; - pgxz = -pgxz; - pEy = -pEy; - pBy = -pBy; - pgyz = -pgyz; - break; - case 7: //--- (pi-theta, pi+phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); - px = -px; - py = -py; - pz = -pz; - pEx = -pEx; - pEy = -pEy; - pEz = -pEz; - } - - f_getnpem2_point(px, py, pz, pchi, pgxx, pgxy, pgxz, pgyy, pgyz, pgzz, pEx, pEy, pEz, pBx, pBy, pBz, - psi4RR, psi4II); - thetap = sqrt((2 * pl + 1.0) / 4.0 / PI) * misc::Wigner_d_function(pl, pm, spinw, costheta); // note the variation from -2 to 2 - - // find back the one - pchi = pchi + 1; -#ifdef GaussInt - // wtcostheta is even function respect costheta - RP_out[countlm] = RP_out[countlm] + thetap / pchi / pchi * (psi4RR * cosmphi + psi4II * sinmphi) * wtcostheta[i]; - IP_out[countlm] = IP_out[countlm] + thetap / pchi / pchi * (psi4II * cosmphi - psi4RR * sinmphi) * wtcostheta[i]; -#else - RP_out[countlm] = RP_out[countlm] + thetap / pchi / pchi * (psi4RR * cosmphi + psi4II * sinmphi); - IP_out[countlm] = IP_out[countlm] + thetap / pchi / pchi * (psi4II * cosmphi - psi4RR * sinmphi); -#endif - } - countlm++; // no sanity check for countlm and NN which should be noted in the input parameters - } - } - - for (int ii = 0; ii < NN; ii++) - { -#ifdef GaussInt - RP_out[ii] = RP_out[ii] * rex * dphi; - IP_out[ii] = IP_out[ii] * rex * dphi; -#else - RP_out[ii] = RP_out[ii] * rex * dphi * dcostheta; - IP_out[ii] = IP_out[ii] * rex * dphi * dcostheta; -#endif - } - //|------+ Communicate and sum the results from each processor. - - { - double *RPIP_out = new double[2 * NN]; - double *RPIP = new double[2 * NN]; - memcpy(RPIP_out, RP_out, NN * sizeof(double)); - memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); - MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - memcpy(RP, RPIP, NN * sizeof(double)); - memcpy(IP, RPIP + NN, NN * sizeof(double)); - delete[] RPIP_out; - delete[] RPIP; - } - - //|------= Free memory. - - delete[] pox[0]; - delete[] pox[1]; - delete[] pox[2]; - delete[] shellf; - delete[] RP_out; - delete[] IP_out; - DG_List->clearList(); -} -//|---------------------------------------------------------------- -// for shell patch -// for EM wave specially symmetric case -// unify for phi1 and phi2 -//|---------------------------------------------------------------- -void surface_integral::surf_Wave(double rex, int lev, ShellPatch *GH, - var *Ex, var *Ey, var *Ez, var *Bx, var *By, var *Bz, - var *chi, var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz, - int spinw, int maxl, int NN, double *RP, double *IP, - monitor *Monitor, - void (*funcs)(double &, double &, double &, - double &, double &, double &, double &, double &, double &, double &, - double &, double &, double &, double &, double &, double &, - double &, double &)) // NN is the length of RP and IP -{ - const int InList = 13; - - MyList *DG_List = new MyList(Ex); - DG_List->insert(Ey); - DG_List->insert(Ez); - DG_List->insert(Bx); - DG_List->insert(By); - DG_List->insert(Bz); - DG_List->insert(chi); - DG_List->insert(gxx); - DG_List->insert(gxy); - DG_List->insert(gxz); - DG_List->insert(gyy); - DG_List->insert(gyz); - DG_List->insert(gzz); - - int n; - double *pox[3]; - for (int i = 0; i < 3; i++) - pox[i] = new double[n_tot]; - for (n = 0; n < n_tot; n++) - { - pox[0][n] = rex * nx_g[n]; - pox[1][n] = rex * ny_g[n]; - pox[2][n] = rex * nz_g[n]; - } - - double *shellf; - shellf = new double[n_tot * InList]; - - GH->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry); - - double *RP_out, *IP_out; - RP_out = new double[NN]; - IP_out = new double[NN]; - - for (int ii = 0; ii < NN; ii++) - { - RP_out[ii] = 0; - IP_out[ii] = 0; - } - -#if 0 -// for debug - if(myrank==0) - { - double costheta, thetap; - double cosmphi,sinmphi; - - int i,j; - int lpsy=0; - if( Symmetry == 0 ) lpsy=1; - else if( Symmetry == 1 ) lpsy=2; - else if( Symmetry == 2 ) lpsy=8; - - double psi4RR,psi4II; - double px,py,pz; - double pEx,pEy,pEz,pBx,pBy,pBz; - double pchi,pgxx,pgxy,pgxz,pgyy,pgyz,pgzz; - for( n = 0; n <= n_tot-1; n++) - { -// need round off always - i = int(n/N_phi); // int(1.723) = 1, int(-1.732) = -1 - j = n - i * N_phi; - - for(int lp=0;lp myrank) - { - Nmin = myrank * mp + myrank; - Nmax = Nmin + mp; - } - else - { - Nmin = myrank * mp + Lp; - Nmax = Nmin + mp - 1; - } - - // theta part - double costheta, thetap; - double cosmphi, sinmphi; - - int i, j; - int lpsy = 0; - if (Symmetry == 0) - lpsy = 1; - else if (Symmetry == 1) - lpsy = 2; - else if (Symmetry == 2) - lpsy = 8; - - double psi4RR, psi4II; - double px, py, pz; - double pEx, pEy, pEz, pBx, pBy, pBz; - double pchi, pgxx, pgxy, pgxz, pgyy, pgyz, pgzz; - for (n = Nmin; n <= Nmax; n++) - { - // need round off always - i = int(n / N_phi); // int(1.723) = 1, int(-1.732) = -1 - j = n - i * N_phi; - - int countlm = 0; - for (int pl = spinw; pl < maxl + 1; pl++) - for (int pm = -pl; pm < pl + 1; pm++) - { - for (int lp = 0; lp < lpsy; lp++) - { - px = pox[0][n]; - py = pox[1][n]; - pz = pox[2][n]; - pEx = shellf[InList * n]; - pEy = shellf[InList * n + 1]; - pEz = shellf[InList * n + 2]; - pBx = shellf[InList * n + 3]; - pBy = shellf[InList * n + 4]; - pBz = shellf[InList * n + 5]; - pchi = shellf[InList * n + 6]; - pgxx = shellf[InList * n + 7]; - pgxy = shellf[InList * n + 8]; - pgxz = shellf[InList * n + 9]; - pgyy = shellf[InList * n + 10]; - pgyz = shellf[InList * n + 11]; - pgzz = shellf[InList * n + 12]; - switch (lp) - { - case 0: //+++ (theta, phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = sin(pm * (j + 0.5) * dphi); - break; - case 1: //++- (pi-theta, phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = sin(pm * (j + 0.5) * dphi); - pz = -pz; - pEz = -pEz; - pBx = -pBx; - pBy = -pBy; - pgxz = -pgxz; - pgyz = -pgyz; - break; - case 2: //+-+ (theta, 2*pi-phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = -sin(pm * (j + 0.5) * dphi); - py = -py; - pEy = -pEy; - pBx = -pBx; - pBz = -pBz; - pgxy = -pgxy; - pgyz = -pgyz; - break; - case 3: //+-- (pi-theta, 2*pi-phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = -sin(pm * (j + 0.5) * dphi); - py = -py; - pz = -pz; - pEz = -pEz; - pBz = -pBz; - pgxz = -pgxz; - pEy = -pEy; - pBy = -pBy; - pgxy = -pgxy; - break; - case 4: //-++ (theta, pi-phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); - px = -px; - pEx = -pEx; - pBy = -pBy; - pBz = -pBz; - pgxy = -pgxy; - pgxz = -pgxz; - break; - case 5: //-+- (pi-theta, pi-phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); - pz = -pz; - px = -px; - pEz = -pEz; - pBz = -pBz; - pgyz = -pgyz; - pEx = -pEx; - pBx = -pBx; - pgxy = -pgxy; - break; - case 6: //--+ (theta, pi+phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); - px = -px; - py = -py; - pEx = -pEx; - pBx = -pBx; - pgxz = -pgxz; - pEy = -pEy; - pBy = -pBy; - pgyz = -pgyz; - break; - case 7: //--- (pi-theta, pi+phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); - px = -px; - py = -py; - pz = -pz; - pEx = -pEx; - pEy = -pEy; - pEz = -pEz; - } - - funcs(px, py, pz, pchi, pgxx, pgxy, pgxz, pgyy, pgyz, pgzz, pEx, pEy, pEz, pBx, pBy, pBz, - psi4RR, psi4II); - thetap = sqrt((2 * pl + 1.0) / 4.0 / PI) * misc::Wigner_d_function(pl, pm, spinw, costheta); // note the variation from -2 to 2 - - // find back the one - pchi = pchi + 1; -#ifdef GaussInt - // wtcostheta is even function respect costheta - RP_out[countlm] = RP_out[countlm] + thetap / pchi / pchi * (psi4RR * cosmphi + psi4II * sinmphi) * wtcostheta[i]; - IP_out[countlm] = IP_out[countlm] + thetap / pchi / pchi * (psi4II * cosmphi - psi4RR * sinmphi) * wtcostheta[i]; -#else - RP_out[countlm] = RP_out[countlm] + thetap / pchi / pchi * (psi4RR * cosmphi + psi4II * sinmphi); - IP_out[countlm] = IP_out[countlm] + thetap / pchi / pchi * (psi4II * cosmphi - psi4RR * sinmphi); -#endif - } - countlm++; // no sanity check for countlm and NN which should be noted in the input parameters - } - } -#endif - - for (int ii = 0; ii < NN; ii++) - { -#ifdef GaussInt - RP_out[ii] = RP_out[ii] * rex * dphi; - IP_out[ii] = IP_out[ii] * rex * dphi; -#else - RP_out[ii] = RP_out[ii] * rex * dphi * dcostheta; - IP_out[ii] = IP_out[ii] * rex * dphi * dcostheta; -#endif - } - //|------+ Communicate and sum the results from each processor. - - { - double *RPIP_out = new double[2 * NN]; - double *RPIP = new double[2 * NN]; - memcpy(RPIP_out, RP_out, NN * sizeof(double)); - memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); - MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - memcpy(RP, RPIP, NN * sizeof(double)); - memcpy(IP, RPIP + NN, NN * sizeof(double)); - delete[] RPIP_out; - delete[] RPIP; - } - - //|------= Free memory. - - delete[] pox[0]; - delete[] pox[1]; - delete[] pox[2]; - delete[] shellf; - delete[] RP_out; - delete[] IP_out; - DG_List->clearList(); -} -//|---------------------------------------------------------------- -// for box -// for EM wave specially symmetric case -// unify for phi1 and phi2 -//|---------------------------------------------------------------- -void surface_integral::surf_Wave(double rex, int lev, cgh *GH, - var *Ex, var *Ey, var *Ez, var *Bx, var *By, var *Bz, - var *chi, var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz, - int spinw, int maxl, int NN, double *RP, double *IP, - monitor *Monitor, - void (*funcs)(double &, double &, double &, - double &, double &, double &, double &, double &, double &, double &, - double &, double &, double &, double &, double &, double &, - double &, double &)) // NN is the length of RP and IP -{ - const int InList = 13; - - MyList *DG_List = new MyList(Ex); - DG_List->insert(Ey); - DG_List->insert(Ez); - DG_List->insert(Bx); - DG_List->insert(By); - DG_List->insert(Bz); - DG_List->insert(chi); - DG_List->insert(gxx); - DG_List->insert(gxy); - DG_List->insert(gxz); - DG_List->insert(gyy); - DG_List->insert(gyz); - DG_List->insert(gzz); - - int n; - double *pox[3]; - for (int i = 0; i < 3; i++) - pox[i] = new double[n_tot]; - for (n = 0; n < n_tot; n++) - { - pox[0][n] = rex * nx_g[n]; - pox[1][n] = rex * ny_g[n]; - pox[2][n] = rex * nz_g[n]; - } - - double *shellf; - shellf = new double[n_tot * InList]; - - GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry); - - double *RP_out, *IP_out; - RP_out = new double[NN]; - IP_out = new double[NN]; - - for (int ii = 0; ii < NN; ii++) - { - RP_out[ii] = 0; - IP_out[ii] = 0; - } - -#if 0 -// for debug - if(myrank==0) - { - double costheta, thetap; - double cosmphi,sinmphi; - - int i,j; - int lpsy=0; - if( Symmetry == 0 ) lpsy=1; - else if( Symmetry == 1 ) lpsy=2; - else if( Symmetry == 2 ) lpsy=8; - - double psi4RR,psi4II; - double px,py,pz; - double pEx,pEy,pEz,pBx,pBy,pBz; - double pchi,pgxx,pgxy,pgxz,pgyy,pgyz,pgzz; - for( n = 0; n <= n_tot-1; n++) - { -// need round off always - i = int(n/N_phi); // int(1.723) = 1, int(-1.732) = -1 - j = n - i * N_phi; - - for(int lp=0;lp myrank) - { - Nmin = myrank * mp + myrank; - Nmax = Nmin + mp; - } - else - { - Nmin = myrank * mp + Lp; - Nmax = Nmin + mp - 1; - } - - // theta part - double costheta, thetap; - double cosmphi, sinmphi; - - int i, j; - int lpsy = 0; - if (Symmetry == 0) - lpsy = 1; - else if (Symmetry == 1) - lpsy = 2; - else if (Symmetry == 2) - lpsy = 8; - - double psi4RR, psi4II; - double px, py, pz; - double pEx, pEy, pEz, pBx, pBy, pBz; - double pchi, pgxx, pgxy, pgxz, pgyy, pgyz, pgzz; - for (n = Nmin; n <= Nmax; n++) - { - // need round off always - i = int(n / N_phi); // int(1.723) = 1, int(-1.732) = -1 - j = n - i * N_phi; - - int countlm = 0; - for (int pl = spinw; pl < maxl + 1; pl++) - for (int pm = -pl; pm < pl + 1; pm++) - { - for (int lp = 0; lp < lpsy; lp++) - { - px = pox[0][n]; - py = pox[1][n]; - pz = pox[2][n]; - pEx = shellf[InList * n]; - pEy = shellf[InList * n + 1]; - pEz = shellf[InList * n + 2]; - pBx = shellf[InList * n + 3]; - pBy = shellf[InList * n + 4]; - pBz = shellf[InList * n + 5]; - pchi = shellf[InList * n + 6]; - pgxx = shellf[InList * n + 7]; - pgxy = shellf[InList * n + 8]; - pgxz = shellf[InList * n + 9]; - pgyy = shellf[InList * n + 10]; - pgyz = shellf[InList * n + 11]; - pgzz = shellf[InList * n + 12]; - switch (lp) - { - case 0: //+++ (theta, phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = sin(pm * (j + 0.5) * dphi); - break; - case 1: //++- (pi-theta, phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = sin(pm * (j + 0.5) * dphi); - pz = -pz; - pEz = -pEz; - pBx = -pBx; - pBy = -pBy; - pgxz = -pgxz; - pgyz = -pgyz; - break; - case 2: //+-+ (theta, 2*pi-phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = -sin(pm * (j + 0.5) * dphi); - py = -py; - pEy = -pEy; - pBx = -pBx; - pBz = -pBz; - pgxy = -pgxy; - pgyz = -pgyz; - break; - case 3: //+-- (pi-theta, 2*pi-phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = -sin(pm * (j + 0.5) * dphi); - py = -py; - pz = -pz; - pEz = -pEz; - pBz = -pBz; - pgxz = -pgxz; - pEy = -pEy; - pBy = -pBy; - pgxy = -pgxy; - break; - case 4: //-++ (theta, pi-phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); - px = -px; - pEx = -pEx; - pBy = -pBy; - pBz = -pBz; - pgxy = -pgxy; - pgxz = -pgxz; - break; - case 5: //-+- (pi-theta, pi-phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); - pz = -pz; - px = -px; - pEz = -pEz; - pBz = -pBz; - pgyz = -pgyz; - pEx = -pEx; - pBx = -pBx; - pgxy = -pgxy; - break; - case 6: //--+ (theta, pi+phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); - px = -px; - py = -py; - pEx = -pEx; - pBx = -pBx; - pgxz = -pgxz; - pEy = -pEy; - pBy = -pBy; - pgyz = -pgyz; - break; - case 7: //--- (pi-theta, pi+phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); - px = -px; - py = -py; - pz = -pz; - pEx = -pEx; - pEy = -pEy; - pEz = -pEz; - } - - funcs(px, py, pz, pchi, pgxx, pgxy, pgxz, pgyy, pgyz, pgzz, pEx, pEy, pEz, pBx, pBy, pBz, - psi4RR, psi4II); - thetap = sqrt((2 * pl + 1.0) / 4.0 / PI) * misc::Wigner_d_function(pl, pm, spinw, costheta); // note the variation from -2 to 2 - - // find back the one - pchi = pchi + 1; -#ifdef GaussInt - // wtcostheta is even function respect costheta - RP_out[countlm] = RP_out[countlm] + thetap / pchi / pchi * (psi4RR * cosmphi + psi4II * sinmphi) * wtcostheta[i]; - IP_out[countlm] = IP_out[countlm] + thetap / pchi / pchi * (psi4II * cosmphi - psi4RR * sinmphi) * wtcostheta[i]; -#else - RP_out[countlm] = RP_out[countlm] + thetap / pchi / pchi * (psi4RR * cosmphi + psi4II * sinmphi); - IP_out[countlm] = IP_out[countlm] + thetap / pchi / pchi * (psi4II * cosmphi - psi4RR * sinmphi); -#endif - } - countlm++; // no sanity check for countlm and NN which should be noted in the input parameters - } - } -#endif - - for (int ii = 0; ii < NN; ii++) - { -#ifdef GaussInt - RP_out[ii] = RP_out[ii] * rex * dphi; - IP_out[ii] = IP_out[ii] * rex * dphi; -#else - RP_out[ii] = RP_out[ii] * rex * dphi * dcostheta; - IP_out[ii] = IP_out[ii] * rex * dphi * dcostheta; -#endif - } - //|------+ Communicate and sum the results from each processor. - - { - double *RPIP_out = new double[2 * NN]; - double *RPIP = new double[2 * NN]; - memcpy(RPIP_out, RP_out, NN * sizeof(double)); - memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); - MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - memcpy(RP, RPIP, NN * sizeof(double)); - memcpy(IP, RPIP + NN, NN * sizeof(double)); - delete[] RPIP_out; - delete[] RPIP; - } - - //|------= Free memory. - - delete[] pox[0]; - delete[] pox[1]; - delete[] pox[2]; - delete[] shellf; - delete[] RP_out; - delete[] IP_out; - DG_List->clearList(); -} -//|---------------------------------------------------------------- -// for null shell patch2 -//|---------------------------------------------------------------- -// rex is x instead of r -void surface_integral::surf_Wave(double rex, int lev, NullShellPatch2 *GH, var *Rpsi4, var *Ipsi4, - int spinw, int maxl, int NN, double *RP, double *IP, - monitor *Monitor) // NN is the length of RP and IP -// spinw 0 for scalar; 1 for electricmagnetic wave; 2 for gravitaitonal wave -// we always assume spinw >= 0 -{ - const int InList = 2; - - MyList *DG_List = new MyList(Rpsi4); - DG_List->insert(Ipsi4); - - int n; - // since we used x instead of r, these global coordinates are fake - double *pox[3]; - for (int i = 0; i < 3; i++) - pox[i] = new double[n_tot]; - for (n = 0; n < n_tot; n++) - { - pox[0][n] = rex * nx_g[n]; - pox[1][n] = rex * ny_g[n]; - pox[2][n] = rex * nz_g[n]; - } - - double *shellf; - shellf = new double[n_tot * InList]; - - GH->Interp_Points_2D(DG_List, n_tot, pox, shellf, Symmetry); - - int mp, Lp, Nmin, Nmax; - - mp = n_tot / cpusize; - Lp = n_tot - cpusize * mp; - - if (Lp > myrank) - { - Nmin = myrank * mp + myrank; - Nmax = Nmin + mp; - } - else - { - Nmin = myrank * mp + Lp; - Nmax = Nmin + mp - 1; - } - - //|~~~~~> Integrate the dot product of Dphi with the surface normal. - - double *RP_out, *IP_out; - RP_out = new double[NN]; - IP_out = new double[NN]; - - for (int ii = 0; ii < NN; ii++) - { - RP_out[ii] = 0; - IP_out[ii] = 0; - } - // theta part - double costheta, thetap; - double cosmphi, sinmphi; - - int i, j; - int lpsy = 0; - if (Symmetry == 0) - lpsy = 1; - else if (Symmetry == 1) - lpsy = 2; - else if (Symmetry == 2) - lpsy = 8; - - double psi4RR, psi4II; - for (n = Nmin; n <= Nmax; n++) - { - // need round off always - i = int(n / N_phi); // int(1.723) = 1, int(-1.732) = -1 - j = n - i * N_phi; - - int countlm = 0; - for (int pl = spinw; pl < maxl + 1; pl++) - for (int pm = -pl; pm < pl + 1; pm++) - { - for (int lp = 0; lp < lpsy; lp++) - { - switch (lp) - { - case 0: //+++ (theta, phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = sin(pm * (j + 0.5) * dphi); - psi4RR = shellf[InList * n]; - psi4II = shellf[InList * n + 1]; - break; - case 1: //++- (pi-theta, phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = sin(pm * (j + 0.5) * dphi); - psi4RR = shellf[InList * n]; - psi4II = -shellf[InList * n + 1]; - break; - case 2: //+-+ (theta, 2*pi-phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = -sin(pm * (j + 0.5) * dphi); - psi4RR = shellf[InList * n]; - psi4II = -shellf[InList * n + 1]; - break; - case 3: //+-- (pi-theta, 2*pi-phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = -sin(pm * (j + 0.5) * dphi); - psi4RR = shellf[InList * n]; - psi4II = shellf[InList * n + 1]; - break; - case 4: //-++ (theta, pi-phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); - psi4RR = shellf[InList * n]; - psi4II = -shellf[InList * n + 1]; - break; - case 5: //-+- (pi-theta, pi-phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); - psi4RR = shellf[InList * n]; - psi4II = shellf[InList * n + 1]; - break; - case 6: //--+ (theta, pi+phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); - psi4RR = shellf[InList * n]; - psi4II = shellf[InList * n + 1]; - break; - case 7: //--- (pi-theta, pi+phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); - psi4RR = shellf[InList * n]; - psi4II = -shellf[InList * n + 1]; - } - - thetap = sqrt((2 * pl + 1.0) / 4.0 / PI) * misc::Wigner_d_function(pl, pm, spinw, costheta); // note the variation from -2 to 2 - // based on Eq.(41) of PRD 77, 024027 (2008) -#ifdef GaussInt - // wtcostheta is even function respect costheta - RP_out[countlm] = RP_out[countlm] + thetap * (psi4RR * cosmphi + psi4II * sinmphi) * wtcostheta[i]; - IP_out[countlm] = IP_out[countlm] + thetap * (psi4II * cosmphi - psi4RR * sinmphi) * wtcostheta[i]; -#else - RP_out[countlm] = RP_out[countlm] + thetap * (psi4RR * cosmphi + psi4II * sinmphi); // + is because \bar of \bar{Y^s_lm} in Eq.(40) - // of PRD 77, 024027 (2008) - IP_out[countlm] = IP_out[countlm] + thetap * (psi4II * cosmphi - psi4RR * sinmphi); -#endif - } - countlm++; // no sanity check for countlm and NN which should be noted in the input parameters - } - } - - for (int ii = 0; ii < NN; ii++) - { -// do not need multiply with rex for null shell -#ifdef GaussInt - RP_out[ii] = RP_out[ii] * dphi; - IP_out[ii] = IP_out[ii] * dphi; -#else - RP_out[ii] = RP_out[ii] * dphi * dcostheta; - IP_out[ii] = IP_out[ii] * dphi * dcostheta; -#endif - } - //|------+ Communicate and sum the results from each processor. - - { - double *RPIP_out = new double[2 * NN]; - double *RPIP = new double[2 * NN]; - memcpy(RPIP_out, RP_out, NN * sizeof(double)); - memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); - MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - memcpy(RP, RPIP, NN * sizeof(double)); - memcpy(IP, RPIP + NN, NN * sizeof(double)); - delete[] RPIP_out; - delete[] RPIP; - } - - //|------= Free memory. - - delete[] pox[0]; - delete[] pox[1]; - delete[] pox[2]; - delete[] shellf; - delete[] RP_out; - delete[] IP_out; - DG_List->clearList(); -} -//|---------------------------------------------------------------- -// for null shell patch -//|---------------------------------------------------------------- -// rex is x instead of r -void surface_integral::surf_Wave(double rex, int lev, NullShellPatch *GH, var *Rpsi4, var *Ipsi4, - int spinw, int maxl, int NN, double *RP, double *IP, - monitor *Monitor) // NN is the length of RP and IP -// spinw 0 for scalar; 1 for electricmagnetic wave; 2 for gravitaitonal wave -// we always assume spinw >= 0 -{ - const int InList = 2; - - MyList *DG_List = new MyList(Rpsi4); - DG_List->insert(Ipsi4); - - int n; - // since we used x instead of r, these global coordinates are fake - double *pox[3]; - for (int i = 0; i < 3; i++) - pox[i] = new double[n_tot]; - for (n = 0; n < n_tot; n++) - { - pox[0][n] = rex * nx_g[n]; - pox[1][n] = rex * ny_g[n]; - pox[2][n] = rex * nz_g[n]; - } - - double *shellf; - shellf = new double[n_tot * InList]; - - GH->Interp_Points_2D(DG_List, n_tot, pox, shellf, Symmetry); - - int mp, Lp, Nmin, Nmax; - - mp = n_tot / cpusize; - Lp = n_tot - cpusize * mp; - - if (Lp > myrank) - { - Nmin = myrank * mp + myrank; - Nmax = Nmin + mp; - } - else - { - Nmin = myrank * mp + Lp; - Nmax = Nmin + mp - 1; - } - - //|~~~~~> Integrate the dot product of Dphi with the surface normal. - - double *RP_out, *IP_out; - RP_out = new double[NN]; - IP_out = new double[NN]; - - for (int ii = 0; ii < NN; ii++) - { - RP_out[ii] = 0; - IP_out[ii] = 0; - } - // theta part - double costheta, thetap; - double cosmphi, sinmphi; - - int i, j; - int lpsy = 0; - if (Symmetry == 0) - lpsy = 1; - else if (Symmetry == 1) - lpsy = 2; - else if (Symmetry == 2) - lpsy = 8; - - double psi4RR, psi4II; - for (n = Nmin; n <= Nmax; n++) - { - // need round off always - i = int(n / N_phi); // int(1.723) = 1, int(-1.732) = -1 - j = n - i * N_phi; - - int countlm = 0; - for (int pl = spinw; pl < maxl + 1; pl++) - for (int pm = -pl; pm < pl + 1; pm++) - { - for (int lp = 0; lp < lpsy; lp++) - { - switch (lp) - { - case 0: //+++ (theta, phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = sin(pm * (j + 0.5) * dphi); - psi4RR = shellf[InList * n]; - psi4II = shellf[InList * n + 1]; - break; - case 1: //++- (pi-theta, phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = sin(pm * (j + 0.5) * dphi); - psi4RR = shellf[InList * n]; - psi4II = -shellf[InList * n + 1]; - break; - case 2: //+-+ (theta, 2*pi-phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = -sin(pm * (j + 0.5) * dphi); - psi4RR = shellf[InList * n]; - psi4II = -shellf[InList * n + 1]; - break; - case 3: //+-- (pi-theta, 2*pi-phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = -sin(pm * (j + 0.5) * dphi); - psi4RR = shellf[InList * n]; - psi4II = shellf[InList * n + 1]; - break; - case 4: //-++ (theta, pi-phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); - psi4RR = shellf[InList * n]; - psi4II = -shellf[InList * n + 1]; - break; - case 5: //-+- (pi-theta, pi-phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); - psi4RR = shellf[InList * n]; - psi4II = shellf[InList * n + 1]; - break; - case 6: //--+ (theta, pi+phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); - psi4RR = shellf[InList * n]; - psi4II = shellf[InList * n + 1]; - break; - case 7: //--- (pi-theta, pi+phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); - psi4RR = shellf[InList * n]; - psi4II = -shellf[InList * n + 1]; - } - - thetap = sqrt((2 * pl + 1.0) / 4.0 / PI) * misc::Wigner_d_function(pl, pm, spinw, costheta); // note the variation from -2 to 2 - // based on Eq.(41) of PRD 77, 024027 (2008) -#ifdef GaussInt - // wtcostheta is even function respect costheta - RP_out[countlm] = RP_out[countlm] + thetap * (psi4RR * cosmphi + psi4II * sinmphi) * wtcostheta[i]; - IP_out[countlm] = IP_out[countlm] + thetap * (psi4II * cosmphi - psi4RR * sinmphi) * wtcostheta[i]; -#else - RP_out[countlm] = RP_out[countlm] + thetap * (psi4RR * cosmphi + psi4II * sinmphi); // + is because \bar of \bar{Y^s_lm} in Eq.(40) - // of PRD 77, 024027 (2008) - IP_out[countlm] = IP_out[countlm] + thetap * (psi4II * cosmphi - psi4RR * sinmphi); -#endif - } - countlm++; // no sanity check for countlm and NN which should be noted in the input parameters - } - } - - for (int ii = 0; ii < NN; ii++) - { -// do not need multiply with rex for null shell -#ifdef GaussInt - RP_out[ii] = RP_out[ii] * dphi; - IP_out[ii] = IP_out[ii] * dphi; -#else - RP_out[ii] = RP_out[ii] * dphi * dcostheta; - IP_out[ii] = IP_out[ii] * dphi * dcostheta; -#endif - } - //|------+ Communicate and sum the results from each processor. - - { - double *RPIP_out = new double[2 * NN]; - double *RPIP = new double[2 * NN]; - memcpy(RPIP_out, RP_out, NN * sizeof(double)); - memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); - MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - memcpy(RP, RPIP, NN * sizeof(double)); - memcpy(IP, RPIP + NN, NN * sizeof(double)); - delete[] RPIP_out; - delete[] RPIP; - } - - //|------= Free memory. - - delete[] pox[0]; - delete[] pox[1]; - delete[] pox[2]; - delete[] shellf; - delete[] RP_out; - delete[] IP_out; - DG_List->clearList(); -} -//|---------------------------------------------------- -//| -//| ADM mass, linear momentum and angular momentum -//| -//|---------------------------------------------------- -void surface_integral::surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var *trK, - var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz, - var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz, - var *Gmx, var *Gmy, var *Gmz, - var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs, // temparay memory for mass^i - double *Rout, monitor *Monitor) -{ - if (myrank == 0 && GH->grids[lev] != 1) - if (Monitor && Monitor->outfile) - Monitor->outfile << "WARNING: surface integral on multipatches" << endl; - else - cout << "WARNING: surface integral on multipatches" << endl; - - double mass, px, py, pz, sx, sy, sz; - - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - f_admmass_bssn(cg->shape, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[chi->sgfn], cg->fgfs[trK->sgfn], - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn], - cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn], - cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], - Symmetry); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - - const int InList = 17; - - MyList *DG_List = new MyList(Sfx_rhs); - DG_List->insert(Sfy_rhs); - DG_List->insert(Sfz_rhs); - DG_List->insert(chi); - DG_List->insert(trK); - DG_List->insert(gxx); - DG_List->insert(gxy); - DG_List->insert(gxz); - DG_List->insert(gyy); - DG_List->insert(gyz); - DG_List->insert(gzz); - DG_List->insert(Axx); - DG_List->insert(Axy); - DG_List->insert(Axz); - DG_List->insert(Ayy); - DG_List->insert(Ayz); - DG_List->insert(Azz); - - int n; - double *pox[3]; - for (int i = 0; i < 3; i++) - pox[i] = new double[n_tot]; - for (n = 0; n < n_tot; n++) - { - pox[0][n] = rex * nx_g[n]; - pox[1][n] = rex * ny_g[n]; - pox[2][n] = rex * nz_g[n]; - } - - int mp, Lp, Nmin, Nmax; - mp = n_tot / cpusize; - Lp = n_tot - cpusize * mp; - if (Lp > myrank) - { - Nmin = myrank * mp + myrank; - Nmax = Nmin + mp; - } - else - { - Nmin = myrank * mp + Lp; - Nmax = Nmin + mp - 1; - } - - double *shellf; - shellf = new double[n_tot * InList]; - - // we have assumed there is only one box on this level, - // so we do not need loop boxes - GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry, Nmin, Nmax); - - double Mass_out = 0; - double ang_outx, ang_outy, ang_outz; - double p_outx, p_outy, p_outz; - ang_outx = ang_outy = ang_outz = 0.0; - p_outx = p_outy = p_outz = 0.0; - const double f1o8 = 0.125; - - double Chi, Psi; - double Gxx, Gxy, Gxz, Gyy, Gyz, Gzz; - double gupxx, gupxy, gupxz, gupyy, gupyz, gupzz; - double TRK, axx, axy, axz, ayy, ayz, azz; - double aupxx, aupxy, aupxz, aupyx, aupyy, aupyz, aupzx, aupzy, aupzz; - int i; - for (n = Nmin; n <= Nmax; n++) - { - // need round off always - i = int(n / N_phi); // int(1.723) = 1, int(-1.732) = -1 - - Chi = shellf[InList * n + 3]; // chi in fact - TRK = shellf[InList * n + 4]; - Gxx = shellf[InList * n + 5] + 1.0; - Gxy = shellf[InList * n + 6]; - Gxz = shellf[InList * n + 7]; - Gyy = shellf[InList * n + 8] + 1.0; - Gyz = shellf[InList * n + 9]; - Gzz = shellf[InList * n + 10] + 1.0; - axx = shellf[InList * n + 11]; - axy = shellf[InList * n + 12]; - axz = shellf[InList * n + 13]; - ayy = shellf[InList * n + 14]; - ayz = shellf[InList * n + 15]; - azz = shellf[InList * n + 16]; - - Chi = 1.0 / (1.0 + Chi); // exp(4*phi) - Psi = Chi * sqrt(Chi); // Psi^6 - -// Chi^2 corresponds to metric determinant -// but this factor has been considered in f_admmass_bssn -#ifdef GaussInt - // wtcostheta is even function respect costheta - Mass_out = Mass_out + (shellf[InList * n] * nx_g[n] + shellf[InList * n + 1] * ny_g[n] + shellf[InList * n + 2] * nz_g[n]) * wtcostheta[i]; -#else - Mass_out = Mass_out + (shellf[InList * n] * nx_g[n] + shellf[InList * n + 1] * ny_g[n] + shellf[InList * n + 2] * nz_g[n]); -#endif - - gupzz = Gxx * Gyy * Gzz + Gxy * Gyz * Gxz + Gxz * Gxy * Gyz - - Gxz * Gyy * Gxz - Gxy * Gxy * Gzz - Gxx * Gyz * Gyz; - gupxx = (Gyy * Gzz - Gyz * Gyz) / gupzz; - gupxy = -(Gxy * Gzz - Gyz * Gxz) / gupzz; - gupxz = (Gxy * Gyz - Gyy * Gxz) / gupzz; - gupyy = (Gxx * Gzz - Gxz * Gxz) / gupzz; - gupyz = -(Gxx * Gyz - Gxy * Gxz) / gupzz; - gupzz = (Gxx * Gyy - Gxy * Gxy) / gupzz; - - aupxx = gupxx * axx + gupxy * axy + gupxz * axz; - aupxy = gupxx * axy + gupxy * ayy + gupxz * ayz; - aupxz = gupxx * axz + gupxy * ayz + gupxz * azz; - aupyx = gupxy * axx + gupyy * axy + gupyz * axz; - aupyy = gupxy * axy + gupyy * ayy + gupyz * ayz; - aupyz = gupxy * axz + gupyy * ayz + gupyz * azz; - aupzx = gupxz * axx + gupyz * axy + gupzz * axz; - aupzy = gupxz * axy + gupyz * ayy + gupzz * ayz; - aupzz = gupxz * axz + gupyz * ayz + gupzz * azz; - if (Symmetry == 0) - { -#ifdef GaussInt - // wtcostheta is even function respect costheta - // 1/8\pi \int \psi^6 (y A^m_z - zA^m_y) dS_m - ang_outx = ang_outx + f1o8 * Psi * (nx_g[n] * (pox[1][n] * aupxz - pox[2][n] * aupxy) + ny_g[n] * (pox[1][n] * aupyz - pox[2][n] * aupyy) + nz_g[n] * (pox[1][n] * aupzz - pox[2][n] * aupzy)) * wtcostheta[i]; - // 1/8\pi \int \psi^6 (z A^m_x - xA^m_z) dS_m - ang_outy = ang_outy + f1o8 * Psi * (nx_g[n] * (pox[2][n] * aupxx - pox[0][n] * aupxz) + ny_g[n] * (pox[2][n] * aupyx - pox[0][n] * aupyz) + nz_g[n] * (pox[2][n] * aupzx - pox[0][n] * aupzz)) * wtcostheta[i]; - // 1/8\pi \int \psi^6 (x A^m_y - yA^m_x) dS_m - ang_outz = ang_outz + f1o8 * Psi * (nx_g[n] * (pox[0][n] * aupxy - pox[1][n] * aupxx) + ny_g[n] * (pox[0][n] * aupyy - pox[1][n] * aupyx) + nz_g[n] * (pox[0][n] * aupzy - pox[1][n] * aupzx)) * wtcostheta[i]; -#else - // 1/8\pi \int \psi^6 (y A^m_z - zA^m_y) dS_m - ang_outx = ang_outx + f1o8 * Psi * (nx_g[n] * (pox[1][n] * aupxz - pox[2][n] * aupxy) + ny_g[n] * (pox[1][n] * aupyz - pox[2][n] * aupyy) + nz_g[n] * (pox[1][n] * aupzz - pox[2][n] * aupzy)); - // 1/8\pi \int \psi^6 (z A^m_x - xA^m_z) dS_m - ang_outy = ang_outy + f1o8 * Psi * (nx_g[n] * (pox[2][n] * aupxx - pox[0][n] * aupxz) + ny_g[n] * (pox[2][n] * aupyx - pox[0][n] * aupyz) + nz_g[n] * (pox[2][n] * aupzx - pox[0][n] * aupzz)); - // 1/8\pi \int \psi^6 (x A^m_y - yA^m_x) dS_m - ang_outz = ang_outz + f1o8 * Psi * (nx_g[n] * (pox[0][n] * aupxy - pox[1][n] * aupxx) + ny_g[n] * (pox[0][n] * aupyy - pox[1][n] * aupyx) + nz_g[n] * (pox[0][n] * aupzy - pox[1][n] * aupzx)); -#endif - } - else if (Symmetry == 1) - { -#ifdef GaussInt - ang_outz = ang_outz + f1o8 * Psi * (nx_g[n] * (pox[0][n] * aupxy - pox[1][n] * aupxx) + ny_g[n] * (pox[0][n] * aupyy - pox[1][n] * aupyx) + nz_g[n] * (pox[0][n] * aupzy - pox[1][n] * aupzx)) * wtcostheta[i]; -#else - ang_outz = ang_outz + f1o8 * Psi * (nx_g[n] * (pox[0][n] * aupxy - pox[1][n] * aupxx) + ny_g[n] * (pox[0][n] * aupyy - pox[1][n] * aupyx) + nz_g[n] * (pox[0][n] * aupzy - pox[1][n] * aupzx)); -#endif - } - - axx = Chi * (axx + Gxx * TRK / 3.0); - axy = Chi * (axy + Gxy * TRK / 3.0); - axz = Chi * (axz + Gxz * TRK / 3.0); - ayy = Chi * (ayy + Gyy * TRK / 3.0); - ayz = Chi * (ayz + Gyz * TRK / 3.0); - azz = Chi * (azz + Gzz * TRK / 3.0); - - axx = axx - TRK; - ayy = ayy - TRK; - azz = azz - TRK; - - // 1/8\pi \int \psi^6 (K_mi - \delta_mi trK) dS^m: lower index linear momentum - if (Symmetry == 0) - { -#ifdef GaussInt - p_outx = p_outx + f1o8 * Psi * (nx_g[n] * axx + ny_g[n] * axy + nz_g[n] * axz) * wtcostheta[i]; - p_outy = p_outy + f1o8 * Psi * (nx_g[n] * axy + ny_g[n] * ayy + nz_g[n] * ayz) * wtcostheta[i]; - p_outz = p_outz + f1o8 * Psi * (nx_g[n] * axz + ny_g[n] * ayz + nz_g[n] * azz) * wtcostheta[i]; -#else - p_outx = p_outx + f1o8 * Psi * (nx_g[n] * axx + ny_g[n] * axy + nz_g[n] * axz); - p_outy = p_outy + f1o8 * Psi * (nx_g[n] * axy + ny_g[n] * ayy + nz_g[n] * ayz); - p_outz = p_outz + f1o8 * Psi * (nx_g[n] * axz + ny_g[n] * ayz + nz_g[n] * azz); -#endif - } - else if (Symmetry == 1) - { -#ifdef GaussInt - p_outx = p_outx + f1o8 * Psi * (nx_g[n] * axx + ny_g[n] * axy + nz_g[n] * axz) * wtcostheta[i]; - p_outy = p_outy + f1o8 * Psi * (nx_g[n] * axy + ny_g[n] * ayy + nz_g[n] * ayz) * wtcostheta[i]; -#else - p_outx = p_outx + f1o8 * Psi * (nx_g[n] * axx + ny_g[n] * axy + nz_g[n] * axz); - p_outy = p_outy + f1o8 * Psi * (nx_g[n] * axy + ny_g[n] * ayy + nz_g[n] * ayz); -#endif - } - } - - { - double scalar_out[7] = {Mass_out, ang_outx, ang_outy, ang_outz, p_outx, p_outy, p_outz}; - double scalar_in[7]; - MPI_Allreduce(scalar_out, scalar_in, 7, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - mass = scalar_in[0]; sx = scalar_in[1]; sy = scalar_in[2]; sz = scalar_in[3]; - px = scalar_in[4]; py = scalar_in[5]; pz = scalar_in[6]; - } - -#ifdef GaussInt - mass = mass * rex * rex * dphi * factor; - - sx = sx * rex * rex * dphi * (1.0 / PI) * factor; - sy = sy * rex * rex * dphi * (1.0 / PI) * factor; - sz = sz * rex * rex * dphi * (1.0 / PI) * factor; - - px = px * rex * rex * dphi * (1.0 / PI) * factor; - py = py * rex * rex * dphi * (1.0 / PI) * factor; - pz = pz * rex * rex * dphi * (1.0 / PI) * factor; -#else - mass = mass * rex * rex * dphi * dcostheta * factor; - - sx = sx * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; - sy = sy * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; - sz = sz * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; - - px = px * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; - py = py * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; - pz = pz * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; -#endif - - Rout[0] = mass; - Rout[1] = px; - Rout[2] = py; - Rout[3] = pz; - Rout[4] = sx; - Rout[5] = sy; - Rout[6] = sz; - - delete[] pox[0]; - delete[] pox[1]; - delete[] pox[2]; - delete[] shellf; - DG_List->clearList(); -} -void surface_integral::surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var *trK, - var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz, - var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz, - var *Gmx, var *Gmy, var *Gmz, - var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs, // temparay memory for mass^i - double *Rout, monitor *Monitor, MPI_Comm Comm_here) -{ - int lmyrank; - MPI_Comm_rank(Comm_here, &lmyrank); - if (lmyrank == 0 && GH->grids[lev] != 1) - if (Monitor && Monitor->outfile) - Monitor->outfile << "WARNING: surface integral on multipatches" << endl; - else - cout << "WARNING: surface integral on multipatches" << endl; - - double mass, px, py, pz, sx, sy, sz; - - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - f_admmass_bssn(cg->shape, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[chi->sgfn], cg->fgfs[trK->sgfn], - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn], - cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn], - cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], - Symmetry); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - - const int InList = 17; - - MyList *DG_List = new MyList(Sfx_rhs); - DG_List->insert(Sfy_rhs); - DG_List->insert(Sfz_rhs); - DG_List->insert(chi); - DG_List->insert(trK); - DG_List->insert(gxx); - DG_List->insert(gxy); - DG_List->insert(gxz); - DG_List->insert(gyy); - DG_List->insert(gyz); - DG_List->insert(gzz); - DG_List->insert(Axx); - DG_List->insert(Axy); - DG_List->insert(Axz); - DG_List->insert(Ayy); - DG_List->insert(Ayz); - DG_List->insert(Azz); - - int n; - double *pox[3]; - for (int i = 0; i < 3; i++) - pox[i] = new double[n_tot]; - for (n = 0; n < n_tot; n++) - { - pox[0][n] = rex * nx_g[n]; - pox[1][n] = rex * ny_g[n]; - pox[2][n] = rex * nz_g[n]; - } - - double *shellf; - shellf = new double[n_tot * InList]; - - // we have assumed there is only one box on this level, - // so we do not need loop boxes - GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry, Comm_here); - - double Mass_out = 0; - double ang_outx, ang_outy, ang_outz; - double p_outx, p_outy, p_outz; - ang_outx = ang_outy = ang_outz = 0.0; - p_outx = p_outy = p_outz = 0.0; - const double f1o8 = 0.125; - - int mp, Lp, Nmin, Nmax; - - int cpusize_here; - MPI_Comm_size(Comm_here, &cpusize_here); - - mp = n_tot / cpusize_here; - Lp = n_tot - cpusize_here * mp; - - if (Lp > lmyrank) - { - Nmin = lmyrank * mp + lmyrank; - Nmax = Nmin + mp; - } - else - { - Nmin = lmyrank * mp + Lp; - Nmax = Nmin + mp - 1; - } - - double Chi, Psi; - double Gxx, Gxy, Gxz, Gyy, Gyz, Gzz; - double gupxx, gupxy, gupxz, gupyy, gupyz, gupzz; - double TRK, axx, axy, axz, ayy, ayz, azz; - double aupxx, aupxy, aupxz, aupyx, aupyy, aupyz, aupzx, aupzy, aupzz; - int i; - for (n = Nmin; n <= Nmax; n++) - { - // need round off always - i = int(n / N_phi); // int(1.723) = 1, int(-1.732) = -1 - - Chi = shellf[InList * n + 3]; // chi in fact - TRK = shellf[InList * n + 4]; - Gxx = shellf[InList * n + 5] + 1.0; - Gxy = shellf[InList * n + 6]; - Gxz = shellf[InList * n + 7]; - Gyy = shellf[InList * n + 8] + 1.0; - Gyz = shellf[InList * n + 9]; - Gzz = shellf[InList * n + 10] + 1.0; - axx = shellf[InList * n + 11]; - axy = shellf[InList * n + 12]; - axz = shellf[InList * n + 13]; - ayy = shellf[InList * n + 14]; - ayz = shellf[InList * n + 15]; - azz = shellf[InList * n + 16]; - - Chi = 1.0 / (1.0 + Chi); // exp(4*phi) - Psi = Chi * sqrt(Chi); // Psi^6 - -// Chi^2 corresponds to metric determinant -// but this factor has been considered in f_admmass_bssn -#ifdef GaussInt - // wtcostheta is even function respect costheta - Mass_out = Mass_out + (shellf[InList * n] * nx_g[n] + shellf[InList * n + 1] * ny_g[n] + shellf[InList * n + 2] * nz_g[n]) * wtcostheta[i]; -#else - Mass_out = Mass_out + (shellf[InList * n] * nx_g[n] + shellf[InList * n + 1] * ny_g[n] + shellf[InList * n + 2] * nz_g[n]); -#endif - - gupzz = Gxx * Gyy * Gzz + Gxy * Gyz * Gxz + Gxz * Gxy * Gyz - - Gxz * Gyy * Gxz - Gxy * Gxy * Gzz - Gxx * Gyz * Gyz; - gupxx = (Gyy * Gzz - Gyz * Gyz) / gupzz; - gupxy = -(Gxy * Gzz - Gyz * Gxz) / gupzz; - gupxz = (Gxy * Gyz - Gyy * Gxz) / gupzz; - gupyy = (Gxx * Gzz - Gxz * Gxz) / gupzz; - gupyz = -(Gxx * Gyz - Gxy * Gxz) / gupzz; - gupzz = (Gxx * Gyy - Gxy * Gxy) / gupzz; - - aupxx = gupxx * axx + gupxy * axy + gupxz * axz; - aupxy = gupxx * axy + gupxy * ayy + gupxz * ayz; - aupxz = gupxx * axz + gupxy * ayz + gupxz * azz; - aupyx = gupxy * axx + gupyy * axy + gupyz * axz; - aupyy = gupxy * axy + gupyy * ayy + gupyz * ayz; - aupyz = gupxy * axz + gupyy * ayz + gupyz * azz; - aupzx = gupxz * axx + gupyz * axy + gupzz * axz; - aupzy = gupxz * axy + gupyz * ayy + gupzz * ayz; - aupzz = gupxz * axz + gupyz * ayz + gupzz * azz; - if (Symmetry == 0) - { -#ifdef GaussInt - // wtcostheta is even function respect costheta - // 1/8\pi \int \psi^6 (y A^m_z - zA^m_y) dS_m - ang_outx = ang_outx + f1o8 * Psi * (nx_g[n] * (pox[1][n] * aupxz - pox[2][n] * aupxy) + ny_g[n] * (pox[1][n] * aupyz - pox[2][n] * aupyy) + nz_g[n] * (pox[1][n] * aupzz - pox[2][n] * aupzy)) * wtcostheta[i]; - // 1/8\pi \int \psi^6 (z A^m_x - xA^m_z) dS_m - ang_outy = ang_outy + f1o8 * Psi * (nx_g[n] * (pox[2][n] * aupxx - pox[0][n] * aupxz) + ny_g[n] * (pox[2][n] * aupyx - pox[0][n] * aupyz) + nz_g[n] * (pox[2][n] * aupzx - pox[0][n] * aupzz)) * wtcostheta[i]; - // 1/8\pi \int \psi^6 (x A^m_y - yA^m_x) dS_m - ang_outz = ang_outz + f1o8 * Psi * (nx_g[n] * (pox[0][n] * aupxy - pox[1][n] * aupxx) + ny_g[n] * (pox[0][n] * aupyy - pox[1][n] * aupyx) + nz_g[n] * (pox[0][n] * aupzy - pox[1][n] * aupzx)) * wtcostheta[i]; -#else - // 1/8\pi \int \psi^6 (y A^m_z - zA^m_y) dS_m - ang_outx = ang_outx + f1o8 * Psi * (nx_g[n] * (pox[1][n] * aupxz - pox[2][n] * aupxy) + ny_g[n] * (pox[1][n] * aupyz - pox[2][n] * aupyy) + nz_g[n] * (pox[1][n] * aupzz - pox[2][n] * aupzy)); - // 1/8\pi \int \psi^6 (z A^m_x - xA^m_z) dS_m - ang_outy = ang_outy + f1o8 * Psi * (nx_g[n] * (pox[2][n] * aupxx - pox[0][n] * aupxz) + ny_g[n] * (pox[2][n] * aupyx - pox[0][n] * aupyz) + nz_g[n] * (pox[2][n] * aupzx - pox[0][n] * aupzz)); - // 1/8\pi \int \psi^6 (x A^m_y - yA^m_x) dS_m - ang_outz = ang_outz + f1o8 * Psi * (nx_g[n] * (pox[0][n] * aupxy - pox[1][n] * aupxx) + ny_g[n] * (pox[0][n] * aupyy - pox[1][n] * aupyx) + nz_g[n] * (pox[0][n] * aupzy - pox[1][n] * aupzx)); -#endif - } - else if (Symmetry == 1) - { -#ifdef GaussInt - ang_outz = ang_outz + f1o8 * Psi * (nx_g[n] * (pox[0][n] * aupxy - pox[1][n] * aupxx) + ny_g[n] * (pox[0][n] * aupyy - pox[1][n] * aupyx) + nz_g[n] * (pox[0][n] * aupzy - pox[1][n] * aupzx)) * wtcostheta[i]; -#else - ang_outz = ang_outz + f1o8 * Psi * (nx_g[n] * (pox[0][n] * aupxy - pox[1][n] * aupxx) + ny_g[n] * (pox[0][n] * aupyy - pox[1][n] * aupyx) + nz_g[n] * (pox[0][n] * aupzy - pox[1][n] * aupzx)); -#endif - } - - axx = Chi * (axx + Gxx * TRK / 3.0); - axy = Chi * (axy + Gxy * TRK / 3.0); - axz = Chi * (axz + Gxz * TRK / 3.0); - ayy = Chi * (ayy + Gyy * TRK / 3.0); - ayz = Chi * (ayz + Gyz * TRK / 3.0); - azz = Chi * (azz + Gzz * TRK / 3.0); - - axx = axx - TRK; - ayy = ayy - TRK; - azz = azz - TRK; - - // 1/8\pi \int \psi^6 (K_mi - \delta_mi trK) dS^m: lower index linear momentum - if (Symmetry == 0) - { -#ifdef GaussInt - p_outx = p_outx + f1o8 * Psi * (nx_g[n] * axx + ny_g[n] * axy + nz_g[n] * axz) * wtcostheta[i]; - p_outy = p_outy + f1o8 * Psi * (nx_g[n] * axy + ny_g[n] * ayy + nz_g[n] * ayz) * wtcostheta[i]; - p_outz = p_outz + f1o8 * Psi * (nx_g[n] * axz + ny_g[n] * ayz + nz_g[n] * azz) * wtcostheta[i]; -#else - p_outx = p_outx + f1o8 * Psi * (nx_g[n] * axx + ny_g[n] * axy + nz_g[n] * axz); - p_outy = p_outy + f1o8 * Psi * (nx_g[n] * axy + ny_g[n] * ayy + nz_g[n] * ayz); - p_outz = p_outz + f1o8 * Psi * (nx_g[n] * axz + ny_g[n] * ayz + nz_g[n] * azz); -#endif - } - else if (Symmetry == 1) - { -#ifdef GaussInt - p_outx = p_outx + f1o8 * Psi * (nx_g[n] * axx + ny_g[n] * axy + nz_g[n] * axz) * wtcostheta[i]; - p_outy = p_outy + f1o8 * Psi * (nx_g[n] * axy + ny_g[n] * ayy + nz_g[n] * ayz) * wtcostheta[i]; -#else - p_outx = p_outx + f1o8 * Psi * (nx_g[n] * axx + ny_g[n] * axy + nz_g[n] * axz); - p_outy = p_outy + f1o8 * Psi * (nx_g[n] * axy + ny_g[n] * ayy + nz_g[n] * ayz); -#endif - } - } - - { - double scalar_out[7] = {Mass_out, ang_outx, ang_outy, ang_outz, p_outx, p_outy, p_outz}; - double scalar_in[7]; - MPI_Allreduce(scalar_out, scalar_in, 7, MPI_DOUBLE, MPI_SUM, Comm_here); - mass = scalar_in[0]; sx = scalar_in[1]; sy = scalar_in[2]; sz = scalar_in[3]; - px = scalar_in[4]; py = scalar_in[5]; pz = scalar_in[6]; - } - -#ifdef GaussInt - mass = mass * rex * rex * dphi * factor; - - sx = sx * rex * rex * dphi * (1.0 / PI) * factor; - sy = sy * rex * rex * dphi * (1.0 / PI) * factor; - sz = sz * rex * rex * dphi * (1.0 / PI) * factor; - - px = px * rex * rex * dphi * (1.0 / PI) * factor; - py = py * rex * rex * dphi * (1.0 / PI) * factor; - pz = pz * rex * rex * dphi * (1.0 / PI) * factor; -#else - mass = mass * rex * rex * dphi * dcostheta * factor; - - sx = sx * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; - sy = sy * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; - sz = sz * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; - - px = px * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; - py = py * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; - pz = pz * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; -#endif - - Rout[0] = mass; - Rout[1] = px; - Rout[2] = py; - Rout[3] = pz; - Rout[4] = sx; - Rout[5] = sy; - Rout[6] = sz; - - delete[] pox[0]; - delete[] pox[1]; - delete[] pox[2]; - delete[] shellf; - DG_List->clearList(); -} -//|---------------------------------------------------------------- -// for shell patch -//|---------------------------------------------------------------- -void surface_integral::surf_MassPAng(double rex, int lev, ShellPatch *GH, var *chi, var *trK, - var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz, - var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz, - var *Gmx, var *Gmy, var *Gmz, - var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs, // temparay memory for mass^i - double *Rout, monitor *Monitor) -{ - if (lev != 0) - { - if (myrank == 0) - { - if (Monitor && Monitor->outfile) - Monitor->outfile << "WARNING: shell surface integral not on level 0" << endl; - else - cout << "WARNING: shell surface integral not on level 0" << endl; - } - return; - } - - double mass, px, py, pz, sx, sy, sz; - - MyList *Pp = GH->PatL; - while (Pp) - { - MyList *BL = Pp->data->blb; - int fngfs = Pp->data->fngfs; - while (BL) - { - Block *cg = BL->data; - if (myrank == cg->rank) - { - f_admmass_bssn_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[fngfs + ShellPatch::gx], cg->fgfs[fngfs + ShellPatch::gy], cg->fgfs[fngfs + ShellPatch::gz], - cg->fgfs[fngfs + ShellPatch::drhodx], cg->fgfs[fngfs + ShellPatch::drhody], cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], cg->fgfs[fngfs + ShellPatch::dsigmady], cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], cg->fgfs[fngfs + ShellPatch::dRdy], cg->fgfs[fngfs + ShellPatch::dRdz], - cg->fgfs[fngfs + ShellPatch::drhodxx], cg->fgfs[fngfs + ShellPatch::drhodxy], cg->fgfs[fngfs + ShellPatch::drhodxz], - cg->fgfs[fngfs + ShellPatch::drhodyy], cg->fgfs[fngfs + ShellPatch::drhodyz], cg->fgfs[fngfs + ShellPatch::drhodzz], - cg->fgfs[fngfs + ShellPatch::dsigmadxx], cg->fgfs[fngfs + ShellPatch::dsigmadxy], cg->fgfs[fngfs + ShellPatch::dsigmadxz], - cg->fgfs[fngfs + ShellPatch::dsigmadyy], cg->fgfs[fngfs + ShellPatch::dsigmadyz], cg->fgfs[fngfs + ShellPatch::dsigmadzz], - cg->fgfs[fngfs + ShellPatch::dRdxx], cg->fgfs[fngfs + ShellPatch::dRdxy], cg->fgfs[fngfs + ShellPatch::dRdxz], - cg->fgfs[fngfs + ShellPatch::dRdyy], cg->fgfs[fngfs + ShellPatch::dRdyz], cg->fgfs[fngfs + ShellPatch::dRdzz], - cg->fgfs[chi->sgfn], cg->fgfs[trK->sgfn], - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn], - cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn], - cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], - Symmetry, Pp->data->sst); - } - if (BL == Pp->data->ble) - break; - BL = BL->next; - } - Pp = Pp->next; - } - - const int InList = 17; - - MyList *DG_List = new MyList(Sfx_rhs); - DG_List->insert(Sfy_rhs); - DG_List->insert(Sfz_rhs); - DG_List->insert(chi); - DG_List->insert(trK); - DG_List->insert(gxx); - DG_List->insert(gxy); - DG_List->insert(gxz); - DG_List->insert(gyy); - DG_List->insert(gyz); - DG_List->insert(gzz); - DG_List->insert(Axx); - DG_List->insert(Axy); - DG_List->insert(Axz); - DG_List->insert(Ayy); - DG_List->insert(Ayz); - DG_List->insert(Azz); - - int n; - double *pox[3]; - for (int i = 0; i < 3; i++) - pox[i] = new double[n_tot]; - for (n = 0; n < n_tot; n++) - { - pox[0][n] = rex * nx_g[n]; - pox[1][n] = rex * ny_g[n]; - pox[2][n] = rex * nz_g[n]; - } - - double *shellf; - shellf = new double[n_tot * InList]; - - // we have assumed there is only one box on this level, - // so we do not need loop boxes - GH->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry); - - double Mass_out = 0; - double ang_outx, ang_outy, ang_outz; - double p_outx, p_outy, p_outz; - ang_outx = ang_outy = ang_outz = 0.0; - p_outx = p_outy = p_outz = 0.0; - const double f1o8 = 0.125; - - int mp, Lp, Nmin, Nmax; - - mp = n_tot / cpusize; - Lp = n_tot - cpusize * mp; - - if (Lp > myrank) - { - Nmin = myrank * mp + myrank; - Nmax = Nmin + mp; - } - else - { - Nmin = myrank * mp + Lp; - Nmax = Nmin + mp - 1; - } - - double Chi, Psi; - double Gxx, Gxy, Gxz, Gyy, Gyz, Gzz; - double gupxx, gupxy, gupxz, gupyy, gupyz, gupzz; - double TRK, axx, axy, axz, ayy, ayz, azz; - double aupxx, aupxy, aupxz, aupyx, aupyy, aupyz, aupzx, aupzy, aupzz; - int i; - for (n = Nmin; n <= Nmax; n++) - { - // need round off always - i = int(n / N_phi); // int(1.723) = 1, int(-1.732) = -1 - - Chi = shellf[InList * n + 3]; // chi in fact - TRK = shellf[InList * n + 4]; - Gxx = shellf[InList * n + 5] + 1.0; - Gxy = shellf[InList * n + 6]; - Gxz = shellf[InList * n + 7]; - Gyy = shellf[InList * n + 8] + 1.0; - Gyz = shellf[InList * n + 9]; - Gzz = shellf[InList * n + 10] + 1.0; - axx = shellf[InList * n + 11]; - axy = shellf[InList * n + 12]; - axz = shellf[InList * n + 13]; - ayy = shellf[InList * n + 14]; - ayz = shellf[InList * n + 15]; - azz = shellf[InList * n + 16]; - - Chi = 1.0 / (1.0 + Chi); // exp(4*phi) - Psi = Chi * sqrt(Chi); // Psi^6 -// Chi^2 corresponds to metric determinant -// but this factor has been considered in f_admmass_bssn -#ifdef GaussInt - // wtcostheta is even function respect costheta - Mass_out = Mass_out + (shellf[InList * n] * nx_g[n] + shellf[InList * n + 1] * ny_g[n] + shellf[InList * n + 2] * nz_g[n]) * wtcostheta[i]; -#else - Mass_out = Mass_out + (shellf[InList * n] * nx_g[n] + shellf[InList * n + 1] * ny_g[n] + shellf[InList * n + 2] * nz_g[n]); -#endif - - gupzz = Gxx * Gyy * Gzz + Gxy * Gyz * Gxz + Gxz * Gxy * Gyz - - Gxz * Gyy * Gxz - Gxy * Gxy * Gzz - Gxx * Gyz * Gyz; - gupxx = (Gyy * Gzz - Gyz * Gyz) / gupzz; - gupxy = -(Gxy * Gzz - Gyz * Gxz) / gupzz; - gupxz = (Gxy * Gyz - Gyy * Gxz) / gupzz; - gupyy = (Gxx * Gzz - Gxz * Gxz) / gupzz; - gupyz = -(Gxx * Gyz - Gxy * Gxz) / gupzz; - gupzz = (Gxx * Gyy - Gxy * Gxy) / gupzz; - - aupxx = gupxx * axx + gupxy * axy + gupxz * axz; - aupxy = gupxx * axy + gupxy * ayy + gupxz * ayz; - aupxz = gupxx * axz + gupxy * ayz + gupxz * azz; - aupyx = gupxy * axx + gupyy * axy + gupyz * axz; - aupyy = gupxy * axy + gupyy * ayy + gupyz * ayz; - aupyz = gupxy * axz + gupyy * ayz + gupyz * azz; - aupzx = gupxz * axx + gupyz * axy + gupzz * axz; - aupzy = gupxz * axy + gupyz * ayy + gupzz * ayz; - aupzz = gupxz * axz + gupyz * ayz + gupzz * azz; - if (Symmetry == 0) - { -#ifdef GaussInt - // wtcostheta is even function respect costheta - // 1/8\pi \int \psi^6 (y A^m_z - zA^m_y) dS_m - ang_outx = ang_outx + f1o8 * Psi * (nx_g[n] * (pox[1][n] * aupxz - pox[2][n] * aupxy) + ny_g[n] * (pox[1][n] * aupyz - pox[2][n] * aupyy) + nz_g[n] * (pox[1][n] * aupzz - pox[2][n] * aupzy)) * wtcostheta[i]; - // 1/8\pi \int \psi^6 (z A^m_x - xA^m_z) dS_m - ang_outy = ang_outy + f1o8 * Psi * (nx_g[n] * (pox[2][n] * aupxx - pox[0][n] * aupxz) + ny_g[n] * (pox[2][n] * aupyx - pox[0][n] * aupyz) + nz_g[n] * (pox[2][n] * aupzx - pox[0][n] * aupzz)) * wtcostheta[i]; - // 1/8\pi \int \psi^6 (x A^m_y - yA^m_x) dS_m - ang_outz = ang_outz + f1o8 * Psi * (nx_g[n] * (pox[0][n] * aupxy - pox[1][n] * aupxx) + ny_g[n] * (pox[0][n] * aupyy - pox[1][n] * aupyx) + nz_g[n] * (pox[0][n] * aupzy - pox[1][n] * aupzx)) * wtcostheta[i]; -#else - // 1/8\pi \int \psi^6 (y A^m_z - zA^m_y) dS_m - ang_outx = ang_outx + f1o8 * Psi * (nx_g[n] * (pox[1][n] * aupxz - pox[2][n] * aupxy) + ny_g[n] * (pox[1][n] * aupyz - pox[2][n] * aupyy) + nz_g[n] * (pox[1][n] * aupzz - pox[2][n] * aupzy)); - // 1/8\pi \int \psi^6 (z A^m_x - xA^m_z) dS_m - ang_outy = ang_outy + f1o8 * Psi * (nx_g[n] * (pox[2][n] * aupxx - pox[0][n] * aupxz) + ny_g[n] * (pox[2][n] * aupyx - pox[0][n] * aupyz) + nz_g[n] * (pox[2][n] * aupzx - pox[0][n] * aupzz)); - // 1/8\pi \int \psi^6 (x A^m_y - yA^m_x) dS_m - ang_outz = ang_outz + f1o8 * Psi * (nx_g[n] * (pox[0][n] * aupxy - pox[1][n] * aupxx) + ny_g[n] * (pox[0][n] * aupyy - pox[1][n] * aupyx) + nz_g[n] * (pox[0][n] * aupzy - pox[1][n] * aupzx)); -#endif - } - else if (Symmetry == 1) - { -#ifdef GaussInt - ang_outz = ang_outz + f1o8 * Psi * (nx_g[n] * (pox[0][n] * aupxy - pox[1][n] * aupxx) + ny_g[n] * (pox[0][n] * aupyy - pox[1][n] * aupyx) + nz_g[n] * (pox[0][n] * aupzy - pox[1][n] * aupzx)) * wtcostheta[i]; -#else - ang_outz = ang_outz + f1o8 * Psi * (nx_g[n] * (pox[0][n] * aupxy - pox[1][n] * aupxx) + ny_g[n] * (pox[0][n] * aupyy - pox[1][n] * aupyx) + nz_g[n] * (pox[0][n] * aupzy - pox[1][n] * aupzx)); -#endif - } - - axx = Chi * (axx + Gxx * TRK / 3.0); - axy = Chi * (axy + Gxy * TRK / 3.0); - axz = Chi * (axz + Gxz * TRK / 3.0); - ayy = Chi * (ayy + Gyy * TRK / 3.0); - ayz = Chi * (ayz + Gyz * TRK / 3.0); - azz = Chi * (azz + Gzz * TRK / 3.0); - - axx = axx - TRK; - ayy = ayy - TRK; - azz = azz - TRK; - - // 1/8\pi \int \psi^6 (K_mi - \delta_mi trK) dS^m: lower index linear momentum - if (Symmetry == 0) - { -#ifdef GaussInt - p_outx = p_outx + f1o8 * Psi * (nx_g[n] * axx + ny_g[n] * axy + nz_g[n] * axz) * wtcostheta[i]; - p_outy = p_outy + f1o8 * Psi * (nx_g[n] * axy + ny_g[n] * ayy + nz_g[n] * ayz) * wtcostheta[i]; - p_outz = p_outz + f1o8 * Psi * (nx_g[n] * axz + ny_g[n] * ayz + nz_g[n] * azz) * wtcostheta[i]; -#else - p_outx = p_outx + f1o8 * Psi * (nx_g[n] * axx + ny_g[n] * axy + nz_g[n] * axz); - p_outy = p_outy + f1o8 * Psi * (nx_g[n] * axy + ny_g[n] * ayy + nz_g[n] * ayz); - p_outz = p_outz + f1o8 * Psi * (nx_g[n] * axz + ny_g[n] * ayz + nz_g[n] * azz); -#endif - } - else if (Symmetry == 1) - { -#ifdef GaussInt - p_outx = p_outx + f1o8 * Psi * (nx_g[n] * axx + ny_g[n] * axy + nz_g[n] * axz) * wtcostheta[i]; - p_outy = p_outy + f1o8 * Psi * (nx_g[n] * axy + ny_g[n] * ayy + nz_g[n] * ayz) * wtcostheta[i]; -#else - p_outx = p_outx + f1o8 * Psi * (nx_g[n] * axx + ny_g[n] * axy + nz_g[n] * axz); - p_outy = p_outy + f1o8 * Psi * (nx_g[n] * axy + ny_g[n] * ayy + nz_g[n] * ayz); -#endif - } - } - - { - double scalar_out[7] = {Mass_out, ang_outx, ang_outy, ang_outz, p_outx, p_outy, p_outz}; - double scalar_in[7]; - MPI_Allreduce(scalar_out, scalar_in, 7, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - mass = scalar_in[0]; sx = scalar_in[1]; sy = scalar_in[2]; sz = scalar_in[3]; - px = scalar_in[4]; py = scalar_in[5]; pz = scalar_in[6]; - } - -#ifdef GaussInt - mass = mass * rex * rex * dphi * factor; - - sx = sx * rex * rex * dphi * (1.0 / PI) * factor; - sy = sy * rex * rex * dphi * (1.0 / PI) * factor; - sz = sz * rex * rex * dphi * (1.0 / PI) * factor; - - px = px * rex * rex * dphi * (1.0 / PI) * factor; - py = py * rex * rex * dphi * (1.0 / PI) * factor; - pz = pz * rex * rex * dphi * (1.0 / PI) * factor; -#else - mass = mass * rex * rex * dphi * dcostheta * factor; - - sx = sx * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; - sy = sy * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; - sz = sz * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; - - px = px * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; - py = py * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; - pz = pz * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; -#endif - - Rout[0] = mass; - Rout[1] = px; - Rout[2] = py; - Rout[3] = pz; - Rout[4] = sx; - Rout[5] = sy; - Rout[6] = sz; - - delete[] pox[0]; - delete[] pox[1]; - delete[] pox[2]; - delete[] shellf; - DG_List->clearList(); -} -//|---------------------------------------------------------------- -// do not discriminate box and shell -// for Gravitational wave specially symmetric case -//|---------------------------------------------------------------- -void surface_integral::surf_Wave(double rex, cgh *GH, ShellPatch *SH, - var *chi, var *trK, - var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz, - var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz, - var *chix, var *chiy, var *chiz, - var *trKx, var *trKy, var *trKz, - var *Axxx, var *Axxy, var *Axxz, - var *Axyx, var *Axyy, var *Axyz, - var *Axzx, var *Axzy, var *Axzz, - var *Ayyx, var *Ayyy, var *Ayyz, - var *Ayzx, var *Ayzy, var *Ayzz, - var *Azzx, var *Azzy, var *Azzz, - var *Gamxxx, var *Gamxxy, var *Gamxxz, var *Gamxyy, var *Gamxyz, var *Gamxzz, - var *Gamyxx, var *Gamyxy, var *Gamyxz, var *Gamyyy, var *Gamyyz, var *Gamyzz, - var *Gamzxx, var *Gamzxy, var *Gamzxz, var *Gamzyy, var *Gamzyz, var *Gamzzz, - var *Rxx, var *Rxy, var *Rxz, var *Ryy, var *Ryz, var *Rzz, - int spinw, int maxl, int NN, double *RP, double *IP, - monitor *Monitor) // NN is the length of RP and IP -{ - const int InList = 62; - - MyList *DG_List = new MyList(chi); - DG_List->insert(trK); - DG_List->insert(gxx); - DG_List->insert(gxy); - DG_List->insert(gxz); - DG_List->insert(gyy); - DG_List->insert(gyz); - DG_List->insert(gzz); - DG_List->insert(Axx); - DG_List->insert(Axy); - DG_List->insert(Axz); - DG_List->insert(Ayy); - DG_List->insert(Ayz); - DG_List->insert(Azz); - DG_List->insert(chix); - DG_List->insert(chiy); - DG_List->insert(chiz); - DG_List->insert(trKx); - DG_List->insert(trKy); - DG_List->insert(trKz); - DG_List->insert(Axxx); - DG_List->insert(Axxy); - DG_List->insert(Axxz); - DG_List->insert(Axyx); - DG_List->insert(Axyy); - DG_List->insert(Axyz); - DG_List->insert(Axzx); - DG_List->insert(Axzy); - DG_List->insert(Axzz); - DG_List->insert(Ayyx); - DG_List->insert(Ayyy); - DG_List->insert(Ayyz); - DG_List->insert(Ayzx); - DG_List->insert(Ayzy); - DG_List->insert(Ayzz); - DG_List->insert(Azzx); - DG_List->insert(Azzy); - DG_List->insert(Azzz); - DG_List->insert(Gamxxx); - DG_List->insert(Gamxxy); - DG_List->insert(Gamxxz); - DG_List->insert(Gamxyy); - DG_List->insert(Gamxyz); - DG_List->insert(Gamxzz); - DG_List->insert(Gamyxx); - DG_List->insert(Gamyxy); - DG_List->insert(Gamyxz); - DG_List->insert(Gamyyy); - DG_List->insert(Gamyyz); - DG_List->insert(Gamyzz); - DG_List->insert(Gamzxx); - DG_List->insert(Gamzxy); - DG_List->insert(Gamzxz); - DG_List->insert(Gamzyy); - DG_List->insert(Gamzyz); - DG_List->insert(Gamzzz); - DG_List->insert(Rxx); - DG_List->insert(Rxy); - DG_List->insert(Rxz); - DG_List->insert(Ryy); - DG_List->insert(Ryz); - DG_List->insert(Rzz); - - int n; - double *pox[3]; - for (int i = 0; i < 3; i++) - pox[i] = new double[n_tot]; - for (n = 0; n < n_tot; n++) - { - pox[0][n] = rex * nx_g[n]; - pox[1][n] = rex * ny_g[n]; - pox[2][n] = rex * nz_g[n]; - } - - double *shellf; - shellf = new double[n_tot * InList]; - - SR_Interp_Points(DG_List, GH, SH, n_tot, pox, shellf); - - double *RP_out, *IP_out; - RP_out = new double[NN]; - IP_out = new double[NN]; - - for (int ii = 0; ii < NN; ii++) - { - RP_out[ii] = 0; - IP_out[ii] = 0; - } - - int mp, Lp, Nmin, Nmax; - - mp = n_tot / cpusize; - Lp = n_tot - cpusize * mp; - - if (Lp > myrank) - { - Nmin = myrank * mp + myrank; - Nmax = Nmin + mp; - } - else - { - Nmin = myrank * mp + Lp; - Nmax = Nmin + mp - 1; - } - - // theta part - double costheta, thetap; - double cosmphi, sinmphi; - - int i, j; - int lpsy = 0; - if (Symmetry == 0) - lpsy = 1; - else if (Symmetry == 1) - lpsy = 2; - else if (Symmetry == 2) - lpsy = 8; - - double psi4RR, psi4II; - double px, py, pz; - double pchi, ptrK, pgxx, pgxy, pgxz, pgyy, pgyz, pgzz; - double pAxx, pAxy, pAxz, pAyy, pAyz, pAzz; - double pchix, pchiy, pchiz; - double ptrKx, ptrKy, ptrKz; - double pAxxx, pAxxy, pAxxz; - double pAxyx, pAxyy, pAxyz; - double pAxzx, pAxzy, pAxzz; - double pAyyx, pAyyy, pAyyz; - double pAyzx, pAyzy, pAyzz; - double pAzzx, pAzzy, pAzzz; - double pGamxxx, pGamxxy, pGamxxz, pGamxyy, pGamxyz, pGamxzz; - double pGamyxx, pGamyxy, pGamyxz, pGamyyy, pGamyyz, pGamyzz; - double pGamzxx, pGamzxy, pGamzxz, pGamzyy, pGamzyz, pGamzzz; - double pRxx, pRxy, pRxz, pRyy, pRyz, pRzz; - for (n = Nmin; n <= Nmax; n++) - { - // need round off always - i = int(n / N_phi); // int(1.723) = 1, int(-1.732) = -1 - j = n - i * N_phi; - - int countlm = 0; - for (int pl = spinw; pl < maxl + 1; pl++) - for (int pm = -pl; pm < pl + 1; pm++) - { - for (int lp = 0; lp < lpsy; lp++) - { - px = pox[0][n]; - py = pox[1][n]; - pz = pox[2][n]; - pchi = shellf[InList * n]; - ptrK = shellf[InList * n + 1]; - pgxx = shellf[InList * n + 2]; - pgxy = shellf[InList * n + 3]; - pgxz = shellf[InList * n + 4]; - pgyy = shellf[InList * n + 5]; - pgyz = shellf[InList * n + 6]; - pgzz = shellf[InList * n + 7]; - pAxx = shellf[InList * n + 8]; - pAxy = shellf[InList * n + 9]; - pAxz = shellf[InList * n + 10]; - pAyy = shellf[InList * n + 11]; - pAyz = shellf[InList * n + 12]; - pAzz = shellf[InList * n + 13]; - pchix = shellf[InList * n + 14]; - pchiy = shellf[InList * n + 15]; - pchiz = shellf[InList * n + 16]; - ptrKx = shellf[InList * n + 17]; - ptrKy = shellf[InList * n + 18]; - ptrKz = shellf[InList * n + 19]; - pAxxx = shellf[InList * n + 20]; - pAxxy = shellf[InList * n + 21]; - pAxxz = shellf[InList * n + 22]; - pAxyx = shellf[InList * n + 23]; - pAxyy = shellf[InList * n + 24]; - pAxyz = shellf[InList * n + 25]; - pAxzx = shellf[InList * n + 26]; - pAxzy = shellf[InList * n + 27]; - pAxzz = shellf[InList * n + 28]; - pAyyx = shellf[InList * n + 29]; - pAyyy = shellf[InList * n + 30]; - pAyyz = shellf[InList * n + 31]; - pAyzx = shellf[InList * n + 32]; - pAyzy = shellf[InList * n + 33]; - pAyzz = shellf[InList * n + 34]; - pAzzx = shellf[InList * n + 35]; - pAzzy = shellf[InList * n + 36]; - pAzzz = shellf[InList * n + 37]; - pGamxxx = shellf[InList * n + 38]; - pGamxxy = shellf[InList * n + 39]; - pGamxxz = shellf[InList * n + 40]; - pGamxyy = shellf[InList * n + 41]; - pGamxyz = shellf[InList * n + 42]; - pGamxzz = shellf[InList * n + 43]; - pGamyxx = shellf[InList * n + 44]; - pGamyxy = shellf[InList * n + 45]; - pGamyxz = shellf[InList * n + 46]; - pGamyyy = shellf[InList * n + 47]; - pGamyyz = shellf[InList * n + 48]; - pGamyzz = shellf[InList * n + 49]; - pGamzxx = shellf[InList * n + 50]; - pGamzxy = shellf[InList * n + 51]; - pGamzxz = shellf[InList * n + 52]; - pGamzyy = shellf[InList * n + 53]; - pGamzyz = shellf[InList * n + 54]; - pGamzzz = shellf[InList * n + 55]; - pRxx = shellf[InList * n + 56]; - pRxy = shellf[InList * n + 57]; - pRxz = shellf[InList * n + 58]; - pRyy = shellf[InList * n + 59]; - pRyz = shellf[InList * n + 60]; - pRzz = shellf[InList * n + 61]; - switch (lp) - { - case 0: //+++ (theta, phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = sin(pm * (j + 0.5) * dphi); - break; - case 1: //++- (pi-theta, phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = sin(pm * (j + 0.5) * dphi); - pz = -pz; - pgxz = -pgxz; - pgyz = -pgyz; - pAxz = -pAxz; - pAyz = -pAyz; - pchiz = -pchiz; - ptrKz = -ptrKz; - pAxxz = -pAxxz; - pAxyz = -pAxyz; - pAxzx = -pAxzx; - pAxzy = -pAxzy; - pAyyz = -pAyyz; - pAyzx = -pAyzx; - pAyzy = -pAyzy; - pAzzz = -pAzzz; - pGamxxz = -pGamxxz; - pGamxyz = -pGamxyz; - pGamyxz = -pGamyxz; - pGamyyz = -pGamyyz; - pGamzxx = -pGamzxx; - pGamzxy = -pGamzxy; - pGamzyy = -pGamzyy; - pGamzzz = -pGamzzz; - pRxz = -pRxz; - pRyz = -pRyz; - break; - case 2: //+-+ (theta, 2*pi-phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = -sin(pm * (j + 0.5) * dphi); - py = -py; - pgxy = -pgxy; - pgyz = -pgyz; - pAxy = -pAxy; - pAyz = -pAyz; - pchiy = -pchiy; - ptrKy = -ptrKy; - pAxxy = -pAxxy; - pAxyx = -pAxyx; - pAxyz = -pAxyz; - pAxzy = -pAxzy; - pAyyy = -pAyyy; - pAyzx = -pAyzx; - pAyzz = -pAyzz; - pAzzy = -pAzzy; - pGamxxy = -pGamxxy; - pGamxyz = -pGamxyz; - pGamyxx = -pGamyxx; - pGamyxz = -pGamyxz; - pGamyyy = -pGamyyy; - pGamyzz = -pGamyzz; - pGamzxy = -pGamzxy; - pGamzyz = -pGamzyz; - pRxy = -pRxy; - pRyz = -pRyz; - break; - case 3: //+-- (pi-theta, 2*pi-phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (j + 0.5) * dphi); - sinmphi = -sin(pm * (j + 0.5) * dphi); - py = -py; - pz = -pz; - pgxy = -pgxy; - pgxz = -pgxz; - pAxy = -pAxy; - pAxz = -pAxz; - pchiy = -pchiy; - pchiz = -pchiz; - ptrKy = -ptrKy; - ptrKz = -ptrKz; - pAxxy = -pAxxy; - pAxxz = -pAxxz; - pAxyx = -pAxyx; - pAxzx = -pAxzx; - pAyyy = -pAyyy; - pAyyz = -pAyyz; - pAyzy = -pAyzy; - pAyzz = -pAyzz; - pAzzy = -pAzzy; - pAzzz = -pAzzz; - pGamxxy = -pGamxxy; - pGamxxz = -pGamxxz; - pGamyxx = -pGamyxx; - pGamyyy = -pGamyyy; - pGamyyz = -pGamyyz; - pGamyzz = -pGamyzz; - pGamzxx = -pGamzxx; - pGamzyy = -pGamzyy; - pGamzyz = -pGamzyz; - pGamzzz = -pGamzzz; - pRxy = -pRxy; - pRxz = -pRxz; - break; - case 4: //-++ (theta, pi-phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); - px = -px; - pgxy = -pgxy; - pgxz = -pgxz; - pAxy = -pAxy; - pAxz = -pAxz; - pchix = -pchix; - ptrKx = -ptrKx; - pAxxx = -pAxxx; - pAxyy = -pAxyy; - pAxyz = -pAxyz; - pAxzy = -pAxzy; - pAxzz = -pAxzz; - pAyyx = -pAyyx; - pAyzx = -pAyzx; - pAzzx = -pAzzx; - pGamxxx = -pGamxxx; - pGamxyy = -pGamxyy; - pGamxyz = -pGamxyz; - pGamxzz = -pGamxzz; - pGamyxy = -pGamyxy; - pGamyxz = -pGamyxz; - pGamzxy = -pGamzxy; - pGamzxz = -pGamzxz; - pRxy = -pRxy; - pRxz = -pRxz; - break; - case 5: //-+- (pi-theta, pi-phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); - px = -px; - pz = -pz; - pgxy = -pgxy; - pgyz = -pgyz; - pAxy = -pAxy; - pAyz = -pAyz; - pchix = -pchix; - pchiz = -pchiz; - ptrKx = -ptrKx; - ptrKz = -ptrKz; - pAxxx = -pAxxx; - pAxxz = -pAxxz; - pAxyy = -pAxyy; - pAxzx = -pAxzx; - pAxzz = -pAxzz; - pAyyx = -pAyyx; - pAyyz = -pAyyz; - pAyzy = -pAyzy; - pAzzx = -pAzzx; - pAzzz = -pAzzz; - pGamxxx = -pGamxxx; - pGamxxz = -pGamxxz; - pGamxyy = -pGamxyy; - pGamxzz = -pGamxzz; - pGamyxy = -pGamyxy; - pGamyyz = -pGamyyz; - pGamzxx = -pGamzxx; - pGamzxz = -pGamzxz; - pGamzyy = -pGamzyy; - pGamzzz = -pGamzzz; - pRxy = -pRxy; - pRyz = -pRyz; - break; - case 6: //--+ (theta, pi+phi) - costheta = arcostheta[i]; - cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); - px = -px; - py = -py; - pgxz = -pgxz; - pgyz = -pgyz; - pAxz = -pAxz; - pAyz = -pAyz; - pchix = -pchix; - pchiy = -pchiy; - ptrKx = -ptrKx; - ptrKy = -ptrKy; - pAxxx = -pAxxx; - pAxxy = -pAxxy; - pAxyx = -pAxyx; - pAxyy = -pAxyy; - pAxzz = -pAxzz; - pAyyx = -pAyyx; - pAyyy = -pAyyy; - pAyzz = -pAyzz; - pAzzx = -pAzzx; - pAzzy = -pAzzy; - pGamxxx = -pGamxxx; - pGamxxy = -pGamxxy; - pGamxyy = -pGamxyy; - pGamxzz = -pGamxzz; - pGamyxx = -pGamyxx; - pGamyxy = -pGamyxy; - pGamyyy = -pGamyyy; - pGamyzz = -pGamyzz; - pGamzxz = -pGamzxz; - pGamzyz = -pGamzyz; - pRxz = -pRxz; - pRyz = -pRyz; - break; - case 7: //--- (pi-theta, pi+phi) - costheta = -arcostheta[i]; - cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); - sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); - px = -px; - py = -py; - pz = -pz; - pchix = -pchix; - pchiy = -pchiy; - pchiz = -pchiz; - ptrKx = -ptrKx; - ptrKy = -ptrKy; - ptrKz = -ptrKz; - pAxxx = -pAxxx; - pAxxy = -pAxxy; - pAxxz = -pAxxz; - pAxyx = -pAxyx; - pAxyy = -pAxyy; - pAxyz = -pAxyz; - pAxzx = -pAxzx; - pAxzy = -pAxzy; - pAxzz = -pAxzz; - pAyyx = -pAyyx; - pAyyy = -pAyyy; - pAyyz = -pAyyz; - pAyzx = -pAyzx; - pAyzy = -pAyzy; - pAyzz = -pAyzz; - pAzzx = -pAzzx; - pAzzy = -pAzzy; - pAzzz = -pAzzz; - pGamxxx = -pGamxxx; - pGamxxy = -pGamxxy; - pGamxxz = -pGamxxz; - pGamxyy = -pGamxyy; - pGamxyz = -pGamxyz; - pGamxzz = -pGamxzz; - pGamyxx = -pGamyxx; - pGamyxy = -pGamyxy; - pGamyxz = -pGamyxz; - pGamyyy = -pGamyyy; - pGamyyz = -pGamyyz; - pGamyzz = -pGamyzz; - pGamzxx = -pGamzxx; - pGamzxy = -pGamzxy; - pGamzxz = -pGamzxz; - pGamzyy = -pGamzyy; - pGamzyz = -pGamzyz; - pGamzzz = -pGamzzz; - } - - f_getnp4_point(px, py, pz, pchi, ptrK, - pgxx, pgxy, pgxz, pgyy, pgyz, pgzz, - pAxx, pAxy, pAxz, pAyy, pAyz, pAzz, - pchix, pchiy, pchiz, - ptrKx, ptrKy, ptrKz, - pAxxx, pAxxy, pAxxz, - pAxyx, pAxyy, pAxyz, - pAxzx, pAxzy, pAxzz, - pAyyx, pAyyy, pAyyz, - pAyzx, pAyzy, pAyzz, - pAzzx, pAzzy, pAzzz, - pGamxxx, pGamxxy, pGamxxz, pGamxyy, pGamxyz, pGamxzz, - pGamyxx, pGamyxy, pGamyxz, pGamyyy, pGamyyz, pGamyzz, - pGamzxx, pGamzxy, pGamzxz, pGamzyy, pGamzyz, pGamzzz, - pRxx, pRxy, pRxz, pRyy, pRyz, pRzz, - psi4RR, psi4II); - - thetap = sqrt((2 * pl + 1.0) / 4.0 / PI) * misc::Wigner_d_function(pl, pm, spinw, costheta); // note the variation from -2 to 2 - - // find back the one - pchi = pchi + 1; -#ifdef GaussInt - // wtcostheta is even function respect costheta - RP_out[countlm] = RP_out[countlm] + thetap / pchi / pchi * (psi4RR * cosmphi + psi4II * sinmphi) * wtcostheta[i]; - IP_out[countlm] = IP_out[countlm] + thetap / pchi / pchi * (psi4II * cosmphi - psi4RR * sinmphi) * wtcostheta[i]; -#else - RP_out[countlm] = RP_out[countlm] + thetap / pchi / pchi * (psi4RR * cosmphi + psi4II * sinmphi); - IP_out[countlm] = IP_out[countlm] + thetap / pchi / pchi * (psi4II * cosmphi - psi4RR * sinmphi); -#endif - } - countlm++; // no sanity check for countlm and NN which should be noted in the input parameters - } - } - - for (int ii = 0; ii < NN; ii++) - { -#ifdef GaussInt - RP_out[ii] = RP_out[ii] * rex * dphi; - IP_out[ii] = IP_out[ii] * rex * dphi; -#else - RP_out[ii] = RP_out[ii] * rex * dphi * dcostheta; - IP_out[ii] = IP_out[ii] * rex * dphi * dcostheta; -#endif - } - //|------+ Communicate and sum the results from each processor. - - { - double *RPIP_out = new double[2 * NN]; - double *RPIP = new double[2 * NN]; - memcpy(RPIP_out, RP_out, NN * sizeof(double)); - memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); - MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - memcpy(RP, RPIP, NN * sizeof(double)); - memcpy(IP, RPIP + NN, NN * sizeof(double)); - delete[] RPIP_out; - delete[] RPIP; - } - - //|------= Free memory. - - delete[] pox[0]; - delete[] pox[1]; - delete[] pox[2]; - delete[] shellf; - delete[] RP_out; - delete[] IP_out; - DG_List->clearList(); -} -//|---------------------------------------------------------------- -// do not discriminate box and shell -//|---------------------------------------------------------------- -bool surface_integral::SR_Interp_Points(MyList *VarList, cgh *GH, ShellPatch *SH, - int NN, double **XX, double *Shellf) -{ - MyList *varl; - int num_var = 0; - varl = VarList; - while (varl) - { - num_var++; - varl = varl->next; - } - - double pox[3]; - for (int i = 0; i < NN; i++) - { - for (int j = 0; j < 3; j++) - pox[j] = XX[j][i]; - int lev = GH->levels - 1; - bool notfound = true; - - while (notfound) - { - if (lev < 0) - { - if (SH) - { - if (SH->Interp_One_Point(VarList, pox, Shellf + i * num_var, Symmetry)) - { - return true; - } - if (myrank == 0) - cout << "surface_integral::SR_Interp_Points point (" << pox[0] << "," << pox[1] << "," << pox[2] << ") is out of cgh and shell domain!" << endl; - } - else - { - if (myrank == 0) - cout << "surface_integral::SR_Interp_Points: point (" << pox[0] << "," << pox[1] << "," << pox[2] << ") is out of cgh domain!" << endl; - } - return false; - } - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - if (Pp->data->Interp_ONE_Point(VarList, pox, Shellf + i * num_var, Symmetry)) - { - notfound = false; - break; - } - Pp = Pp->next; - } - lev--; - } - } - return true; -} + +//---------------------------------------------------------------- +// Using Gauss-Legendre quadrature in theta direction +// and trapezoidal rule in phi direction (from Second Euler-Maclaurin summation formula, we can see that +// this method gives expolential convergence for periodic function) +//---------------------------------------------------------------- +#ifdef newc +#include +#include +#include +#include +#include +#include +#include +#include +using namespace std; +#else +#include +#include +#include +#include +#include +#include +#endif +#include + +#include "misc.h" +#include "cgh.h" +#include "Parallel.h" +#include "surface_integral.h" +#include "fadmquantites_bssn.h" +#include "getnpem2.h" +#include "getnp4.h" +#include "parameters.h" + +#define PI M_PI +//|============================================================================ +//| Constructor +//|============================================================================ + +surface_integral::surface_integral(int iSymmetry) : Symmetry(iSymmetry) +{ + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + MPI_Comm_size(MPI_COMM_WORLD, &cpusize); + int N = 40; + // read parameter from file + { + const int LEN = 256; + char pline[LEN]; + string str, sgrp, skey, sval; + int sind; + char pname[50]; + { + map::iterator iter = parameters::str_par.find("inputpar"); + if (iter != parameters::str_par.end()) + { + strcpy(pname, (iter->second).c_str()); + } + else + { + cout << "Error inputpar" << endl; + exit(0); + } + } + ifstream inf(pname, ifstream::in); + if (!inf.good() && myrank == 0) + { + cout << "Can not open parameter file " << pname << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + for (int i = 1; inf.good(); i++) + { + inf.getline(pline, LEN); + str = pline; + + int status = misc::parse_parts(str, sgrp, skey, sval, sind); + if (status == -1) + { + cout << "error reading parameter file " << pname << " in line " << i << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + else if (status == 0) + continue; + + if (sgrp == "SurfaceIntegral") + { + if (skey == "number of points for quarter sphere") + N = atoi(sval.c_str()); + } + } + inf.close(); + } + //|-----number of points for whole [0,pi] x [0,2pi] + N_phi = 4 * N; // for simplicity, we require this number must be 4*N + N_theta = 2 * N; // 2*N + + if (myrank == 0) + { + cout << "-----------------------------------------------------------------------" << endl; +#ifdef GaussInt + cout << " spherical integration for wave form extraction with Gauss method " << endl; +#else + cout << " spherical integration for wave form extraction with mid point method " << endl; +#endif + cout << " N_phi = " << N_phi << endl; + cout << " N_theta = " << N_theta << endl; + cout << "-----------------------------------------------------------------------" << endl; + } + +#ifdef GaussInt + // weight function cover all of [0,pi] + arcostheta = new double[N_theta]; + wtcostheta = new double[N_theta]; + + // note: theta in [0,pi/2], upper half sphere, corresponds to 1 < costheta < 0 + misc::gaulegf(-1.0, 1.0, arcostheta, wtcostheta, N_theta); + // due to symmetry, I need first half array corresponds to upper sphere, note these two arrays must match each other + misc::inversearray(arcostheta, N_theta); + misc::inversearray(wtcostheta, N_theta); +#endif + + if (Symmetry == 2) + { + N_phi = N_phi / 4; + N_theta = N_theta / 2; + dphi = PI / (2.0 * N_phi); + dcostheta = 1.0 / N_theta; + factor = 8; + } + else if (Symmetry == 1) + { + N_theta = N_theta / 2; + dphi = 2.0 * PI / N_phi; + dcostheta = 1.0 / N_theta; + factor = 2; + } + else if (Symmetry == 0) + { + dphi = 2.0 * PI / N_phi; + dcostheta = 2.0 / N_theta; + factor = 1; + } + else if (myrank == 0) + { + cout << "surface_integral::surface_integral: not supported Symmetry setting!" << endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + +#ifndef GaussInt + // weight function cover all of [0,pi] + arcostheta = new double[N_theta]; +#endif + n_tot = N_theta * N_phi; + nx_g = new double[n_tot]; + ny_g = new double[n_tot]; + nz_g = new double[n_tot]; + + int n = 0; + double costheta, sintheta, ph; + + for (int i = 0; i < N_theta; ++i) + { +#ifndef GaussInt + arcostheta[i] = 1.0 - (i + 0.5) * dcostheta; +#endif + costheta = arcostheta[i]; + sintheta = sqrt(1.0 - costheta * costheta); + + for (int j = 0; j < N_phi; ++j) + { + ph = (j + 0.5) * dphi; + // normal vector respect to the constant R sphere + nx_g[n] = sintheta * cos(ph); + ny_g[n] = sintheta * sin(ph); + nz_g[n] = costheta; + n++; + } + } +} + +//|============================================================================ +//| Destructor +//|============================================================================ +surface_integral::~surface_integral() +{ + delete[] nx_g; + delete[] ny_g; + delete[] nz_g; + delete[] arcostheta; +#ifdef GaussInt + delete[] wtcostheta; +#endif +} +//|---------------------------------------------------------------- +// spin weighted spinw component of psi4, general routine +// l takes from spinw to maxl; m takes from -l to l +//|---------------------------------------------------------------- +void surface_integral::surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *Ipsi4, + int spinw, int maxl, int NN, double *RP, double *IP, + monitor *Monitor) // NN is the length of RP and IP +{ + if (myrank == 0 && GH->grids[lev] != 1) + if (Monitor->outfile) + Monitor->outfile << "WARNING: surface integral on multipatches" << endl; + else + cout << "WARNING: surface integral on multipatches" << endl; + + const int InList = 2; + + MyList *DG_List = new MyList(Rpsi4); + DG_List->insert(Ipsi4); + + int n; + double *pox[3]; + for (int i = 0; i < 3; i++) + pox[i] = new double[n_tot]; + for (n = 0; n < n_tot; n++) + { + pox[0][n] = rex * nx_g[n]; + pox[1][n] = rex * ny_g[n]; + pox[2][n] = rex * nz_g[n]; + } + + int mp, Lp, Nmin, Nmax; + mp = n_tot / cpusize; + Lp = n_tot - cpusize * mp; + if (Lp > myrank) + { + Nmin = myrank * mp + myrank; + Nmax = Nmin + mp; + } + else + { + Nmin = myrank * mp + Lp; + Nmax = Nmin + mp - 1; + } + + double *shellf; + shellf = new double[n_tot * InList]; + + GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry, Nmin, Nmax); + double *RP_out, *IP_out; + RP_out = new double[NN]; + IP_out = new double[NN]; + + for (int ii = 0; ii < NN; ii++) + { + RP_out[ii] = 0; + IP_out[ii] = 0; + } + // theta part + double costheta, thetap; + double cosmphi, sinmphi; + + int i, j; + int lpsy = 0; + if (Symmetry == 0) + lpsy = 1; + else if (Symmetry == 1) + lpsy = 2; + else if (Symmetry == 2) + lpsy = 8; + + double psi4RR, psi4II; + for (n = Nmin; n <= Nmax; n++) + { + // need round off always + i = int(n / N_phi); // int(1.723) = 1, int(-1.732) = -1 + j = n - i * N_phi; + + int countlm = 0; + for (int pl = spinw; pl < maxl + 1; pl++) + for (int pm = -pl; pm < pl + 1; pm++) + { + for (int lp = 0; lp < lpsy; lp++) + { + switch (lp) + { + case 0: //+++ (theta, phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = sin(pm * (j + 0.5) * dphi); + psi4RR = shellf[InList * n]; + psi4II = shellf[InList * n + 1]; + break; + case 1: //++- (pi-theta, phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = sin(pm * (j + 0.5) * dphi); + psi4RR = Rpsi4->SoA[2] * shellf[InList * n]; + psi4II = Ipsi4->SoA[2] * shellf[InList * n + 1]; + break; + case 2: //+-+ (theta, 2*pi-phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = -sin(pm * (j + 0.5) * dphi); + psi4RR = Rpsi4->SoA[1] * shellf[InList * n]; + psi4II = Ipsi4->SoA[1] * shellf[InList * n + 1]; + break; + case 3: //+-- (pi-theta, 2*pi-phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = -sin(pm * (j + 0.5) * dphi); + psi4RR = Rpsi4->SoA[2] * Rpsi4->SoA[1] * shellf[InList * n]; + psi4II = Ipsi4->SoA[2] * Ipsi4->SoA[1] * shellf[InList * n + 1]; + break; + case 4: //-++ (theta, pi-phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); + psi4RR = Rpsi4->SoA[0] * shellf[InList * n]; + psi4II = Ipsi4->SoA[0] * shellf[InList * n + 1]; + break; + case 5: //-+- (pi-theta, pi-phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); + psi4RR = Rpsi4->SoA[2] * Rpsi4->SoA[0] * shellf[InList * n]; + psi4II = Ipsi4->SoA[2] * Ipsi4->SoA[0] * shellf[InList * n + 1]; + break; + case 6: //--+ (theta, pi+phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); + psi4RR = Rpsi4->SoA[1] * Rpsi4->SoA[0] * shellf[InList * n]; + psi4II = Ipsi4->SoA[1] * Ipsi4->SoA[0] * shellf[InList * n + 1]; + break; + case 7: //--- (pi-theta, pi+phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); + psi4RR = Rpsi4->SoA[2] * Rpsi4->SoA[1] * Rpsi4->SoA[0] * shellf[InList * n]; + psi4II = Ipsi4->SoA[2] * Ipsi4->SoA[1] * Ipsi4->SoA[0] * shellf[InList * n + 1]; + } + + thetap = sqrt((2 * pl + 1.0) / 4.0 / PI) * misc::Wigner_d_function(pl, pm, spinw, costheta); // note the variation from -2 to 2 +#ifdef GaussInt + // wtcostheta is even function respect costheta + RP_out[countlm] = RP_out[countlm] + thetap * (psi4RR * cosmphi + psi4II * sinmphi) * wtcostheta[i]; + IP_out[countlm] = IP_out[countlm] + thetap * (psi4II * cosmphi - psi4RR * sinmphi) * wtcostheta[i]; +#else + RP_out[countlm] = RP_out[countlm] + thetap * (psi4RR * cosmphi + psi4II * sinmphi); + IP_out[countlm] = IP_out[countlm] + thetap * (psi4II * cosmphi - psi4RR * sinmphi); +#endif + } + countlm++; // no sanity check for countlm and NN which should be noted in the input parameters + } + } + + for (int ii = 0; ii < NN; ii++) + { +#ifdef GaussInt + RP_out[ii] = RP_out[ii] * rex * dphi; + IP_out[ii] = IP_out[ii] * rex * dphi; +#else + RP_out[ii] = RP_out[ii] * rex * dphi * dcostheta; + IP_out[ii] = IP_out[ii] * rex * dphi * dcostheta; +#endif + } + //|------+ Communicate and sum the results from each processor. + + { + double *RPIP_out = new double[2 * NN]; + double *RPIP = new double[2 * NN]; + memcpy(RPIP_out, RP_out, NN * sizeof(double)); + memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); + MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + memcpy(RP, RPIP, NN * sizeof(double)); + memcpy(IP, RPIP + NN, NN * sizeof(double)); + delete[] RPIP_out; + delete[] RPIP; + } + + //|------= Free memory. + + delete[] pox[0]; + delete[] pox[1]; + delete[] pox[2]; + delete[] shellf; + delete[] RP_out; + delete[] IP_out; + DG_List->clearList(); +} +void surface_integral::surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *Ipsi4, + int spinw, int maxl, int NN, double *RP, double *IP, + monitor *Monitor, MPI_Comm Comm_here) // NN is the length of RP and IP +{ + // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"start surface_integral::surf_Wave"); + + int lmyrank; + MPI_Comm_rank(Comm_here, &lmyrank); + if (lmyrank == 0 && GH->grids[lev] != 1) + if (Monitor->outfile) + Monitor->outfile << "WARNING: surface integral on multipatches" << endl; + else + cout << "WARNING: surface integral on multipatches" << endl; + + const int InList = 2; + + MyList *DG_List = new MyList(Rpsi4); + DG_List->insert(Ipsi4); + + int n; + double *pox[3]; + for (int i = 0; i < 3; i++) + pox[i] = new double[n_tot]; + for (n = 0; n < n_tot; n++) + { + pox[0][n] = rex * nx_g[n]; + pox[1][n] = rex * ny_g[n]; + pox[2][n] = rex * nz_g[n]; + } + + double *shellf; + shellf = new double[n_tot * InList]; + + // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Interp_Points"); + + GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry, Comm_here); + + // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Interp_Points"); + + int mp, Lp, Nmin, Nmax; + + int cpusize_here; + MPI_Comm_size(Comm_here, &cpusize_here); + + mp = n_tot / cpusize_here; + Lp = n_tot - cpusize_here * mp; + + if (Lp > lmyrank) + { + Nmin = lmyrank * mp + lmyrank; + Nmax = Nmin + mp; + } + else + { + Nmin = lmyrank * mp + Lp; + Nmax = Nmin + mp - 1; + } + + //|~~~~~> Integrate the dot product of Dphi with the surface normal. + + double *RP_out, *IP_out; + RP_out = new double[NN]; + IP_out = new double[NN]; + + for (int ii = 0; ii < NN; ii++) + { + RP_out[ii] = 0; + IP_out[ii] = 0; + } + // theta part + double costheta, thetap; + double cosmphi, sinmphi; + + int i, j; + int lpsy = 0; + if (Symmetry == 0) + lpsy = 1; + else if (Symmetry == 1) + lpsy = 2; + else if (Symmetry == 2) + lpsy = 8; + + double psi4RR, psi4II; + for (n = Nmin; n <= Nmax; n++) + { + // need round off always + i = int(n / N_phi); // int(1.723) = 1, int(-1.732) = -1 + j = n - i * N_phi; + + int countlm = 0; + for (int pl = spinw; pl < maxl + 1; pl++) + for (int pm = -pl; pm < pl + 1; pm++) + { + for (int lp = 0; lp < lpsy; lp++) + { + switch (lp) + { + case 0: //+++ (theta, phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = sin(pm * (j + 0.5) * dphi); + psi4RR = shellf[InList * n]; + psi4II = shellf[InList * n + 1]; + break; + case 1: //++- (pi-theta, phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = sin(pm * (j + 0.5) * dphi); + psi4RR = Rpsi4->SoA[2] * shellf[InList * n]; + psi4II = Ipsi4->SoA[2] * shellf[InList * n + 1]; + break; + case 2: //+-+ (theta, 2*pi-phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = -sin(pm * (j + 0.5) * dphi); + psi4RR = Rpsi4->SoA[1] * shellf[InList * n]; + psi4II = Ipsi4->SoA[1] * shellf[InList * n + 1]; + break; + case 3: //+-- (pi-theta, 2*pi-phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = -sin(pm * (j + 0.5) * dphi); + psi4RR = Rpsi4->SoA[2] * Rpsi4->SoA[1] * shellf[InList * n]; + psi4II = Ipsi4->SoA[2] * Ipsi4->SoA[1] * shellf[InList * n + 1]; + break; + case 4: //-++ (theta, pi-phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); + psi4RR = Rpsi4->SoA[0] * shellf[InList * n]; + psi4II = Ipsi4->SoA[0] * shellf[InList * n + 1]; + break; + case 5: //-+- (pi-theta, pi-phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); + psi4RR = Rpsi4->SoA[2] * Rpsi4->SoA[0] * shellf[InList * n]; + psi4II = Ipsi4->SoA[2] * Ipsi4->SoA[0] * shellf[InList * n + 1]; + break; + case 6: //--+ (theta, pi+phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); + psi4RR = Rpsi4->SoA[1] * Rpsi4->SoA[0] * shellf[InList * n]; + psi4II = Ipsi4->SoA[1] * Ipsi4->SoA[0] * shellf[InList * n + 1]; + break; + case 7: //--- (pi-theta, pi+phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); + psi4RR = Rpsi4->SoA[2] * Rpsi4->SoA[1] * Rpsi4->SoA[0] * shellf[InList * n]; + psi4II = Ipsi4->SoA[2] * Ipsi4->SoA[1] * Ipsi4->SoA[0] * shellf[InList * n + 1]; + } + + thetap = sqrt((2 * pl + 1.0) / 4.0 / PI) * misc::Wigner_d_function(pl, pm, spinw, costheta); // note the variation from -2 to 2 +#ifdef GaussInt + // wtcostheta is even function respect costheta + RP_out[countlm] = RP_out[countlm] + thetap * (psi4RR * cosmphi + psi4II * sinmphi) * wtcostheta[i]; + IP_out[countlm] = IP_out[countlm] + thetap * (psi4II * cosmphi - psi4RR * sinmphi) * wtcostheta[i]; +#else + RP_out[countlm] = RP_out[countlm] + thetap * (psi4RR * cosmphi + psi4II * sinmphi); + IP_out[countlm] = IP_out[countlm] + thetap * (psi4II * cosmphi - psi4RR * sinmphi); +#endif + } + countlm++; // no sanity check for countlm and NN which should be noted in the input parameters + } + } + + for (int ii = 0; ii < NN; ii++) + { +#ifdef GaussInt + RP_out[ii] = RP_out[ii] * rex * dphi; + IP_out[ii] = IP_out[ii] * rex * dphi; +#else + RP_out[ii] = RP_out[ii] * rex * dphi * dcostheta; + IP_out[ii] = IP_out[ii] * rex * dphi * dcostheta; +#endif + } + //|------+ Communicate and sum the results from each processor. + + { + double *RPIP_out = new double[2 * NN]; + double *RPIP = new double[2 * NN]; + memcpy(RPIP_out, RP_out, NN * sizeof(double)); + memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); + MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, Comm_here); + memcpy(RP, RPIP, NN * sizeof(double)); + memcpy(IP, RPIP + NN, NN * sizeof(double)); + delete[] RPIP_out; + delete[] RPIP; + } + + //|------= Free memory. + + delete[] pox[0]; + delete[] pox[1]; + delete[] pox[2]; + delete[] shellf; + delete[] RP_out; + delete[] IP_out; + DG_List->clearList(); +} +//|---------------------------------------------------------------- +// for shell patch +//|---------------------------------------------------------------- +void surface_integral::surf_Wave(double rex, int lev, ShellPatch *GH, var *Rpsi4, var *Ipsi4, + int spinw, int maxl, int NN, double *RP, double *IP, + monitor *Monitor) // NN is the length of RP and IP +{ + const int InList = 2; + + MyList *DG_List = new MyList(Rpsi4); + DG_List->insert(Ipsi4); + + int n; + double *pox[3]; + for (int i = 0; i < 3; i++) + pox[i] = new double[n_tot]; + for (n = 0; n < n_tot; n++) + { + pox[0][n] = rex * nx_g[n]; + pox[1][n] = rex * ny_g[n]; + pox[2][n] = rex * nz_g[n]; + } + + double *shellf; + shellf = new double[n_tot * InList]; + + GH->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry); + + int mp, Lp, Nmin, Nmax; + + mp = n_tot / cpusize; + Lp = n_tot - cpusize * mp; + + if (Lp > myrank) + { + Nmin = myrank * mp + myrank; + Nmax = Nmin + mp; + } + else + { + Nmin = myrank * mp + Lp; + Nmax = Nmin + mp - 1; + } + + //|~~~~~> Integrate the dot product of Dphi with the surface normal. + + double *RP_out, *IP_out; + RP_out = new double[NN]; + IP_out = new double[NN]; + + for (int ii = 0; ii < NN; ii++) + { + RP_out[ii] = 0; + IP_out[ii] = 0; + } + // theta part + double costheta, thetap; + double cosmphi, sinmphi; + + int i, j; + int lpsy = 0; + if (Symmetry == 0) + lpsy = 1; + else if (Symmetry == 1) + lpsy = 2; + else if (Symmetry == 2) + lpsy = 8; + + double psi4RR, psi4II; + for (n = Nmin; n <= Nmax; n++) + { + // need round off always + i = int(n / N_phi); // int(1.723) = 1, int(-1.732) = -1 + j = n - i * N_phi; + + int countlm = 0; + for (int pl = spinw; pl < maxl + 1; pl++) + for (int pm = -pl; pm < pl + 1; pm++) + { + for (int lp = 0; lp < lpsy; lp++) + { + switch (lp) + { + case 0: //+++ (theta, phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = sin(pm * (j + 0.5) * dphi); + psi4RR = shellf[InList * n]; + psi4II = shellf[InList * n + 1]; + break; + case 1: //++- (pi-theta, phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = sin(pm * (j + 0.5) * dphi); + psi4RR = Rpsi4->SoA[2] * shellf[InList * n]; + psi4II = Ipsi4->SoA[2] * shellf[InList * n + 1]; + break; + case 2: //+-+ (theta, 2*pi-phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = -sin(pm * (j + 0.5) * dphi); + psi4RR = Rpsi4->SoA[1] * shellf[InList * n]; + psi4II = Ipsi4->SoA[1] * shellf[InList * n + 1]; + break; + case 3: //+-- (pi-theta, 2*pi-phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = -sin(pm * (j + 0.5) * dphi); + psi4RR = Rpsi4->SoA[2] * Rpsi4->SoA[1] * shellf[InList * n]; + psi4II = Ipsi4->SoA[2] * Ipsi4->SoA[1] * shellf[InList * n + 1]; + break; + case 4: //-++ (theta, pi-phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); + psi4RR = Rpsi4->SoA[0] * shellf[InList * n]; + psi4II = Ipsi4->SoA[0] * shellf[InList * n + 1]; + break; + case 5: //-+- (pi-theta, pi-phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); + psi4RR = Rpsi4->SoA[2] * Rpsi4->SoA[0] * shellf[InList * n]; + psi4II = Ipsi4->SoA[2] * Ipsi4->SoA[0] * shellf[InList * n + 1]; + break; + case 6: //--+ (theta, pi+phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); + psi4RR = Rpsi4->SoA[1] * Rpsi4->SoA[0] * shellf[InList * n]; + psi4II = Ipsi4->SoA[1] * Ipsi4->SoA[0] * shellf[InList * n + 1]; + break; + case 7: //--- (pi-theta, pi+phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); + psi4RR = Rpsi4->SoA[2] * Rpsi4->SoA[1] * Rpsi4->SoA[0] * shellf[InList * n]; + psi4II = Ipsi4->SoA[2] * Ipsi4->SoA[1] * Ipsi4->SoA[0] * shellf[InList * n + 1]; + } + + thetap = sqrt((2 * pl + 1.0) / 4.0 / PI) * misc::Wigner_d_function(pl, pm, spinw, costheta); // note the variation from -2 to 2 +#ifdef GaussInt + // wtcostheta is even function respect costheta + RP_out[countlm] = RP_out[countlm] + thetap * (psi4RR * cosmphi + psi4II * sinmphi) * wtcostheta[i]; + IP_out[countlm] = IP_out[countlm] + thetap * (psi4II * cosmphi - psi4RR * sinmphi) * wtcostheta[i]; +#else + RP_out[countlm] = RP_out[countlm] + thetap * (psi4RR * cosmphi + psi4II * sinmphi); + IP_out[countlm] = IP_out[countlm] + thetap * (psi4II * cosmphi - psi4RR * sinmphi); +#endif + } + countlm++; // no sanity check for countlm and NN which should be noted in the input parameters + } + } + + for (int ii = 0; ii < NN; ii++) + { +#ifdef GaussInt + RP_out[ii] = RP_out[ii] * rex * dphi; + IP_out[ii] = IP_out[ii] * rex * dphi; +#else + RP_out[ii] = RP_out[ii] * rex * dphi * dcostheta; + IP_out[ii] = IP_out[ii] * rex * dphi * dcostheta; +#endif + } + //|------+ Communicate and sum the results from each processor. + + { + double *RPIP_out = new double[2 * NN]; + double *RPIP = new double[2 * NN]; + memcpy(RPIP_out, RP_out, NN * sizeof(double)); + memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); + MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + memcpy(RP, RPIP, NN * sizeof(double)); + memcpy(IP, RPIP + NN, NN * sizeof(double)); + delete[] RPIP_out; + delete[] RPIP; + } + + //|------= Free memory. + + delete[] pox[0]; + delete[] pox[1]; + delete[] pox[2]; + delete[] shellf; + delete[] RP_out; + delete[] IP_out; + DG_List->clearList(); +} +//|---------------------------------------------------------------- +// for shell patch +// for EM wave specially symmetric case +//|---------------------------------------------------------------- +void surface_integral::surf_Wave(double rex, int lev, ShellPatch *GH, + var *Ex, var *Ey, var *Ez, var *Bx, var *By, var *Bz, + var *chi, var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz, + int spinw, int maxl, int NN, double *RP, double *IP, + monitor *Monitor) // NN is the length of RP and IP +{ + const int InList = 13; + + MyList *DG_List = new MyList(Ex); + DG_List->insert(Ey); + DG_List->insert(Ez); + DG_List->insert(Bx); + DG_List->insert(By); + DG_List->insert(Bz); + DG_List->insert(chi); + DG_List->insert(gxx); + DG_List->insert(gxy); + DG_List->insert(gxz); + DG_List->insert(gyy); + DG_List->insert(gyz); + DG_List->insert(gzz); + + int n; + double *pox[3]; + for (int i = 0; i < 3; i++) + pox[i] = new double[n_tot]; + for (n = 0; n < n_tot; n++) + { + pox[0][n] = rex * nx_g[n]; + pox[1][n] = rex * ny_g[n]; + pox[2][n] = rex * nz_g[n]; + } + + double *shellf; + shellf = new double[n_tot * InList]; + + GH->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry); + + int mp, Lp, Nmin, Nmax; + + mp = n_tot / cpusize; + Lp = n_tot - cpusize * mp; + + if (Lp > myrank) + { + Nmin = myrank * mp + myrank; + Nmax = Nmin + mp; + } + else + { + Nmin = myrank * mp + Lp; + Nmax = Nmin + mp - 1; + } + + //|~~~~~> Integrate the dot product of Dphi with the surface normal. + + double *RP_out, *IP_out; + RP_out = new double[NN]; + IP_out = new double[NN]; + + for (int ii = 0; ii < NN; ii++) + { + RP_out[ii] = 0; + IP_out[ii] = 0; + } + // theta part + double costheta, thetap; + double cosmphi, sinmphi; + + int i, j; + int lpsy = 0; + if (Symmetry == 0) + lpsy = 1; + else if (Symmetry == 1) + lpsy = 2; + else if (Symmetry == 2) + lpsy = 8; + + double psi4RR, psi4II; + double px, py, pz; + double pEx, pEy, pEz, pBx, pBy, pBz; + double pchi, pgxx, pgxy, pgxz, pgyy, pgyz, pgzz; + for (n = Nmin; n <= Nmax; n++) + { + // need round off always + i = int(n / N_phi); // int(1.723) = 1, int(-1.732) = -1 + j = n - i * N_phi; + + int countlm = 0; + for (int pl = spinw; pl < maxl + 1; pl++) + for (int pm = -pl; pm < pl + 1; pm++) + { + for (int lp = 0; lp < lpsy; lp++) + { + px = pox[0][n]; + py = pox[1][n]; + pz = pox[2][n]; + pEx = shellf[InList * n]; + pEy = shellf[InList * n + 1]; + pEz = shellf[InList * n + 2]; + pBx = shellf[InList * n + 3]; + pBy = shellf[InList * n + 4]; + pBz = shellf[InList * n + 5]; + pchi = shellf[InList * n + 6]; + pgxx = shellf[InList * n + 7]; + pgxy = shellf[InList * n + 8]; + pgxz = shellf[InList * n + 9]; + pgyy = shellf[InList * n + 10]; + pgyz = shellf[InList * n + 11]; + pgzz = shellf[InList * n + 12]; + switch (lp) + { + case 0: //+++ (theta, phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = sin(pm * (j + 0.5) * dphi); + break; + case 1: //++- (pi-theta, phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = sin(pm * (j + 0.5) * dphi); + pz = -pz; + pEz = -pEz; + pBx = -pBx; + pBy = -pBy; + pgxz = -pgxz; + pgyz = -pgyz; + break; + case 2: //+-+ (theta, 2*pi-phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = -sin(pm * (j + 0.5) * dphi); + py = -py; + pEy = -pEy; + pBx = -pBx; + pBz = -pBz; + pgxy = -pgxy; + pgyz = -pgyz; + break; + case 3: //+-- (pi-theta, 2*pi-phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = -sin(pm * (j + 0.5) * dphi); + py = -py; + pz = -pz; + pEz = -pEz; + pBz = -pBz; + pgxz = -pgxz; + pEy = -pEy; + pBy = -pBy; + pgxy = -pgxy; + break; + case 4: //-++ (theta, pi-phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); + px = -px; + pEx = -pEx; + pBy = -pBy; + pBz = -pBz; + pgxy = -pgxy; + pgxz = -pgxz; + break; + case 5: //-+- (pi-theta, pi-phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); + pz = -pz; + px = -px; + pEz = -pEz; + pBz = -pBz; + pgyz = -pgyz; + pEx = -pEx; + pBx = -pBx; + pgxy = -pgxy; + break; + case 6: //--+ (theta, pi+phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); + px = -px; + py = -py; + pEx = -pEx; + pBx = -pBx; + pgxz = -pgxz; + pEy = -pEy; + pBy = -pBy; + pgyz = -pgyz; + break; + case 7: //--- (pi-theta, pi+phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); + px = -px; + py = -py; + pz = -pz; + pEx = -pEx; + pEy = -pEy; + pEz = -pEz; + } + + f_getnpem2_point(px, py, pz, pchi, pgxx, pgxy, pgxz, pgyy, pgyz, pgzz, pEx, pEy, pEz, pBx, pBy, pBz, + psi4RR, psi4II); + thetap = sqrt((2 * pl + 1.0) / 4.0 / PI) * misc::Wigner_d_function(pl, pm, spinw, costheta); // note the variation from -2 to 2 + + // find back the one + pchi = pchi + 1; +#ifdef GaussInt + // wtcostheta is even function respect costheta + RP_out[countlm] = RP_out[countlm] + thetap / pchi / pchi * (psi4RR * cosmphi + psi4II * sinmphi) * wtcostheta[i]; + IP_out[countlm] = IP_out[countlm] + thetap / pchi / pchi * (psi4II * cosmphi - psi4RR * sinmphi) * wtcostheta[i]; +#else + RP_out[countlm] = RP_out[countlm] + thetap / pchi / pchi * (psi4RR * cosmphi + psi4II * sinmphi); + IP_out[countlm] = IP_out[countlm] + thetap / pchi / pchi * (psi4II * cosmphi - psi4RR * sinmphi); +#endif + } + countlm++; // no sanity check for countlm and NN which should be noted in the input parameters + } + } + + for (int ii = 0; ii < NN; ii++) + { +#ifdef GaussInt + RP_out[ii] = RP_out[ii] * rex * dphi; + IP_out[ii] = IP_out[ii] * rex * dphi; +#else + RP_out[ii] = RP_out[ii] * rex * dphi * dcostheta; + IP_out[ii] = IP_out[ii] * rex * dphi * dcostheta; +#endif + } + //|------+ Communicate and sum the results from each processor. + + { + double *RPIP_out = new double[2 * NN]; + double *RPIP = new double[2 * NN]; + memcpy(RPIP_out, RP_out, NN * sizeof(double)); + memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); + MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + memcpy(RP, RPIP, NN * sizeof(double)); + memcpy(IP, RPIP + NN, NN * sizeof(double)); + delete[] RPIP_out; + delete[] RPIP; + } + + //|------= Free memory. + + delete[] pox[0]; + delete[] pox[1]; + delete[] pox[2]; + delete[] shellf; + delete[] RP_out; + delete[] IP_out; + DG_List->clearList(); +} +//|---------------------------------------------------------------- +// for shell patch +// for EM wave specially symmetric case +// unify for phi1 and phi2 +//|---------------------------------------------------------------- +void surface_integral::surf_Wave(double rex, int lev, ShellPatch *GH, + var *Ex, var *Ey, var *Ez, var *Bx, var *By, var *Bz, + var *chi, var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz, + int spinw, int maxl, int NN, double *RP, double *IP, + monitor *Monitor, + void (*funcs)(double &, double &, double &, + double &, double &, double &, double &, double &, double &, double &, + double &, double &, double &, double &, double &, double &, + double &, double &)) // NN is the length of RP and IP +{ + const int InList = 13; + + MyList *DG_List = new MyList(Ex); + DG_List->insert(Ey); + DG_List->insert(Ez); + DG_List->insert(Bx); + DG_List->insert(By); + DG_List->insert(Bz); + DG_List->insert(chi); + DG_List->insert(gxx); + DG_List->insert(gxy); + DG_List->insert(gxz); + DG_List->insert(gyy); + DG_List->insert(gyz); + DG_List->insert(gzz); + + int n; + double *pox[3]; + for (int i = 0; i < 3; i++) + pox[i] = new double[n_tot]; + for (n = 0; n < n_tot; n++) + { + pox[0][n] = rex * nx_g[n]; + pox[1][n] = rex * ny_g[n]; + pox[2][n] = rex * nz_g[n]; + } + + double *shellf; + shellf = new double[n_tot * InList]; + + GH->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry); + + double *RP_out, *IP_out; + RP_out = new double[NN]; + IP_out = new double[NN]; + + for (int ii = 0; ii < NN; ii++) + { + RP_out[ii] = 0; + IP_out[ii] = 0; + } + +#if 0 +// for debug + if(myrank==0) + { + double costheta, thetap; + double cosmphi,sinmphi; + + int i,j; + int lpsy=0; + if( Symmetry == 0 ) lpsy=1; + else if( Symmetry == 1 ) lpsy=2; + else if( Symmetry == 2 ) lpsy=8; + + double psi4RR,psi4II; + double px,py,pz; + double pEx,pEy,pEz,pBx,pBy,pBz; + double pchi,pgxx,pgxy,pgxz,pgyy,pgyz,pgzz; + for( n = 0; n <= n_tot-1; n++) + { +// need round off always + i = int(n/N_phi); // int(1.723) = 1, int(-1.732) = -1 + j = n - i * N_phi; + + for(int lp=0;lp myrank) + { + Nmin = myrank * mp + myrank; + Nmax = Nmin + mp; + } + else + { + Nmin = myrank * mp + Lp; + Nmax = Nmin + mp - 1; + } + + // theta part + double costheta, thetap; + double cosmphi, sinmphi; + + int i, j; + int lpsy = 0; + if (Symmetry == 0) + lpsy = 1; + else if (Symmetry == 1) + lpsy = 2; + else if (Symmetry == 2) + lpsy = 8; + + double psi4RR, psi4II; + double px, py, pz; + double pEx, pEy, pEz, pBx, pBy, pBz; + double pchi, pgxx, pgxy, pgxz, pgyy, pgyz, pgzz; + for (n = Nmin; n <= Nmax; n++) + { + // need round off always + i = int(n / N_phi); // int(1.723) = 1, int(-1.732) = -1 + j = n - i * N_phi; + + int countlm = 0; + for (int pl = spinw; pl < maxl + 1; pl++) + for (int pm = -pl; pm < pl + 1; pm++) + { + for (int lp = 0; lp < lpsy; lp++) + { + px = pox[0][n]; + py = pox[1][n]; + pz = pox[2][n]; + pEx = shellf[InList * n]; + pEy = shellf[InList * n + 1]; + pEz = shellf[InList * n + 2]; + pBx = shellf[InList * n + 3]; + pBy = shellf[InList * n + 4]; + pBz = shellf[InList * n + 5]; + pchi = shellf[InList * n + 6]; + pgxx = shellf[InList * n + 7]; + pgxy = shellf[InList * n + 8]; + pgxz = shellf[InList * n + 9]; + pgyy = shellf[InList * n + 10]; + pgyz = shellf[InList * n + 11]; + pgzz = shellf[InList * n + 12]; + switch (lp) + { + case 0: //+++ (theta, phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = sin(pm * (j + 0.5) * dphi); + break; + case 1: //++- (pi-theta, phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = sin(pm * (j + 0.5) * dphi); + pz = -pz; + pEz = -pEz; + pBx = -pBx; + pBy = -pBy; + pgxz = -pgxz; + pgyz = -pgyz; + break; + case 2: //+-+ (theta, 2*pi-phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = -sin(pm * (j + 0.5) * dphi); + py = -py; + pEy = -pEy; + pBx = -pBx; + pBz = -pBz; + pgxy = -pgxy; + pgyz = -pgyz; + break; + case 3: //+-- (pi-theta, 2*pi-phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = -sin(pm * (j + 0.5) * dphi); + py = -py; + pz = -pz; + pEz = -pEz; + pBz = -pBz; + pgxz = -pgxz; + pEy = -pEy; + pBy = -pBy; + pgxy = -pgxy; + break; + case 4: //-++ (theta, pi-phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); + px = -px; + pEx = -pEx; + pBy = -pBy; + pBz = -pBz; + pgxy = -pgxy; + pgxz = -pgxz; + break; + case 5: //-+- (pi-theta, pi-phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); + pz = -pz; + px = -px; + pEz = -pEz; + pBz = -pBz; + pgyz = -pgyz; + pEx = -pEx; + pBx = -pBx; + pgxy = -pgxy; + break; + case 6: //--+ (theta, pi+phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); + px = -px; + py = -py; + pEx = -pEx; + pBx = -pBx; + pgxz = -pgxz; + pEy = -pEy; + pBy = -pBy; + pgyz = -pgyz; + break; + case 7: //--- (pi-theta, pi+phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); + px = -px; + py = -py; + pz = -pz; + pEx = -pEx; + pEy = -pEy; + pEz = -pEz; + } + + funcs(px, py, pz, pchi, pgxx, pgxy, pgxz, pgyy, pgyz, pgzz, pEx, pEy, pEz, pBx, pBy, pBz, + psi4RR, psi4II); + thetap = sqrt((2 * pl + 1.0) / 4.0 / PI) * misc::Wigner_d_function(pl, pm, spinw, costheta); // note the variation from -2 to 2 + + // find back the one + pchi = pchi + 1; +#ifdef GaussInt + // wtcostheta is even function respect costheta + RP_out[countlm] = RP_out[countlm] + thetap / pchi / pchi * (psi4RR * cosmphi + psi4II * sinmphi) * wtcostheta[i]; + IP_out[countlm] = IP_out[countlm] + thetap / pchi / pchi * (psi4II * cosmphi - psi4RR * sinmphi) * wtcostheta[i]; +#else + RP_out[countlm] = RP_out[countlm] + thetap / pchi / pchi * (psi4RR * cosmphi + psi4II * sinmphi); + IP_out[countlm] = IP_out[countlm] + thetap / pchi / pchi * (psi4II * cosmphi - psi4RR * sinmphi); +#endif + } + countlm++; // no sanity check for countlm and NN which should be noted in the input parameters + } + } +#endif + + for (int ii = 0; ii < NN; ii++) + { +#ifdef GaussInt + RP_out[ii] = RP_out[ii] * rex * dphi; + IP_out[ii] = IP_out[ii] * rex * dphi; +#else + RP_out[ii] = RP_out[ii] * rex * dphi * dcostheta; + IP_out[ii] = IP_out[ii] * rex * dphi * dcostheta; +#endif + } + //|------+ Communicate and sum the results from each processor. + + { + double *RPIP_out = new double[2 * NN]; + double *RPIP = new double[2 * NN]; + memcpy(RPIP_out, RP_out, NN * sizeof(double)); + memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); + MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + memcpy(RP, RPIP, NN * sizeof(double)); + memcpy(IP, RPIP + NN, NN * sizeof(double)); + delete[] RPIP_out; + delete[] RPIP; + } + + //|------= Free memory. + + delete[] pox[0]; + delete[] pox[1]; + delete[] pox[2]; + delete[] shellf; + delete[] RP_out; + delete[] IP_out; + DG_List->clearList(); +} +//|---------------------------------------------------------------- +// for box +// for EM wave specially symmetric case +// unify for phi1 and phi2 +//|---------------------------------------------------------------- +void surface_integral::surf_Wave(double rex, int lev, cgh *GH, + var *Ex, var *Ey, var *Ez, var *Bx, var *By, var *Bz, + var *chi, var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz, + int spinw, int maxl, int NN, double *RP, double *IP, + monitor *Monitor, + void (*funcs)(double &, double &, double &, + double &, double &, double &, double &, double &, double &, double &, + double &, double &, double &, double &, double &, double &, + double &, double &)) // NN is the length of RP and IP +{ + const int InList = 13; + + MyList *DG_List = new MyList(Ex); + DG_List->insert(Ey); + DG_List->insert(Ez); + DG_List->insert(Bx); + DG_List->insert(By); + DG_List->insert(Bz); + DG_List->insert(chi); + DG_List->insert(gxx); + DG_List->insert(gxy); + DG_List->insert(gxz); + DG_List->insert(gyy); + DG_List->insert(gyz); + DG_List->insert(gzz); + + int n; + double *pox[3]; + for (int i = 0; i < 3; i++) + pox[i] = new double[n_tot]; + for (n = 0; n < n_tot; n++) + { + pox[0][n] = rex * nx_g[n]; + pox[1][n] = rex * ny_g[n]; + pox[2][n] = rex * nz_g[n]; + } + + double *shellf; + shellf = new double[n_tot * InList]; + + GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry); + + double *RP_out, *IP_out; + RP_out = new double[NN]; + IP_out = new double[NN]; + + for (int ii = 0; ii < NN; ii++) + { + RP_out[ii] = 0; + IP_out[ii] = 0; + } + +#if 0 +// for debug + if(myrank==0) + { + double costheta, thetap; + double cosmphi,sinmphi; + + int i,j; + int lpsy=0; + if( Symmetry == 0 ) lpsy=1; + else if( Symmetry == 1 ) lpsy=2; + else if( Symmetry == 2 ) lpsy=8; + + double psi4RR,psi4II; + double px,py,pz; + double pEx,pEy,pEz,pBx,pBy,pBz; + double pchi,pgxx,pgxy,pgxz,pgyy,pgyz,pgzz; + for( n = 0; n <= n_tot-1; n++) + { +// need round off always + i = int(n/N_phi); // int(1.723) = 1, int(-1.732) = -1 + j = n - i * N_phi; + + for(int lp=0;lp myrank) + { + Nmin = myrank * mp + myrank; + Nmax = Nmin + mp; + } + else + { + Nmin = myrank * mp + Lp; + Nmax = Nmin + mp - 1; + } + + // theta part + double costheta, thetap; + double cosmphi, sinmphi; + + int i, j; + int lpsy = 0; + if (Symmetry == 0) + lpsy = 1; + else if (Symmetry == 1) + lpsy = 2; + else if (Symmetry == 2) + lpsy = 8; + + double psi4RR, psi4II; + double px, py, pz; + double pEx, pEy, pEz, pBx, pBy, pBz; + double pchi, pgxx, pgxy, pgxz, pgyy, pgyz, pgzz; + for (n = Nmin; n <= Nmax; n++) + { + // need round off always + i = int(n / N_phi); // int(1.723) = 1, int(-1.732) = -1 + j = n - i * N_phi; + + int countlm = 0; + for (int pl = spinw; pl < maxl + 1; pl++) + for (int pm = -pl; pm < pl + 1; pm++) + { + for (int lp = 0; lp < lpsy; lp++) + { + px = pox[0][n]; + py = pox[1][n]; + pz = pox[2][n]; + pEx = shellf[InList * n]; + pEy = shellf[InList * n + 1]; + pEz = shellf[InList * n + 2]; + pBx = shellf[InList * n + 3]; + pBy = shellf[InList * n + 4]; + pBz = shellf[InList * n + 5]; + pchi = shellf[InList * n + 6]; + pgxx = shellf[InList * n + 7]; + pgxy = shellf[InList * n + 8]; + pgxz = shellf[InList * n + 9]; + pgyy = shellf[InList * n + 10]; + pgyz = shellf[InList * n + 11]; + pgzz = shellf[InList * n + 12]; + switch (lp) + { + case 0: //+++ (theta, phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = sin(pm * (j + 0.5) * dphi); + break; + case 1: //++- (pi-theta, phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = sin(pm * (j + 0.5) * dphi); + pz = -pz; + pEz = -pEz; + pBx = -pBx; + pBy = -pBy; + pgxz = -pgxz; + pgyz = -pgyz; + break; + case 2: //+-+ (theta, 2*pi-phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = -sin(pm * (j + 0.5) * dphi); + py = -py; + pEy = -pEy; + pBx = -pBx; + pBz = -pBz; + pgxy = -pgxy; + pgyz = -pgyz; + break; + case 3: //+-- (pi-theta, 2*pi-phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = -sin(pm * (j + 0.5) * dphi); + py = -py; + pz = -pz; + pEz = -pEz; + pBz = -pBz; + pgxz = -pgxz; + pEy = -pEy; + pBy = -pBy; + pgxy = -pgxy; + break; + case 4: //-++ (theta, pi-phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); + px = -px; + pEx = -pEx; + pBy = -pBy; + pBz = -pBz; + pgxy = -pgxy; + pgxz = -pgxz; + break; + case 5: //-+- (pi-theta, pi-phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); + pz = -pz; + px = -px; + pEz = -pEz; + pBz = -pBz; + pgyz = -pgyz; + pEx = -pEx; + pBx = -pBx; + pgxy = -pgxy; + break; + case 6: //--+ (theta, pi+phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); + px = -px; + py = -py; + pEx = -pEx; + pBx = -pBx; + pgxz = -pgxz; + pEy = -pEy; + pBy = -pBy; + pgyz = -pgyz; + break; + case 7: //--- (pi-theta, pi+phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); + px = -px; + py = -py; + pz = -pz; + pEx = -pEx; + pEy = -pEy; + pEz = -pEz; + } + + funcs(px, py, pz, pchi, pgxx, pgxy, pgxz, pgyy, pgyz, pgzz, pEx, pEy, pEz, pBx, pBy, pBz, + psi4RR, psi4II); + thetap = sqrt((2 * pl + 1.0) / 4.0 / PI) * misc::Wigner_d_function(pl, pm, spinw, costheta); // note the variation from -2 to 2 + + // find back the one + pchi = pchi + 1; +#ifdef GaussInt + // wtcostheta is even function respect costheta + RP_out[countlm] = RP_out[countlm] + thetap / pchi / pchi * (psi4RR * cosmphi + psi4II * sinmphi) * wtcostheta[i]; + IP_out[countlm] = IP_out[countlm] + thetap / pchi / pchi * (psi4II * cosmphi - psi4RR * sinmphi) * wtcostheta[i]; +#else + RP_out[countlm] = RP_out[countlm] + thetap / pchi / pchi * (psi4RR * cosmphi + psi4II * sinmphi); + IP_out[countlm] = IP_out[countlm] + thetap / pchi / pchi * (psi4II * cosmphi - psi4RR * sinmphi); +#endif + } + countlm++; // no sanity check for countlm and NN which should be noted in the input parameters + } + } +#endif + + for (int ii = 0; ii < NN; ii++) + { +#ifdef GaussInt + RP_out[ii] = RP_out[ii] * rex * dphi; + IP_out[ii] = IP_out[ii] * rex * dphi; +#else + RP_out[ii] = RP_out[ii] * rex * dphi * dcostheta; + IP_out[ii] = IP_out[ii] * rex * dphi * dcostheta; +#endif + } + //|------+ Communicate and sum the results from each processor. + + { + double *RPIP_out = new double[2 * NN]; + double *RPIP = new double[2 * NN]; + memcpy(RPIP_out, RP_out, NN * sizeof(double)); + memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); + MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + memcpy(RP, RPIP, NN * sizeof(double)); + memcpy(IP, RPIP + NN, NN * sizeof(double)); + delete[] RPIP_out; + delete[] RPIP; + } + + //|------= Free memory. + + delete[] pox[0]; + delete[] pox[1]; + delete[] pox[2]; + delete[] shellf; + delete[] RP_out; + delete[] IP_out; + DG_List->clearList(); +} +//|---------------------------------------------------------------- +// for null shell patch2 +//|---------------------------------------------------------------- +// rex is x instead of r +void surface_integral::surf_Wave(double rex, int lev, NullShellPatch2 *GH, var *Rpsi4, var *Ipsi4, + int spinw, int maxl, int NN, double *RP, double *IP, + monitor *Monitor) // NN is the length of RP and IP +// spinw 0 for scalar; 1 for electricmagnetic wave; 2 for gravitaitonal wave +// we always assume spinw >= 0 +{ + const int InList = 2; + + MyList *DG_List = new MyList(Rpsi4); + DG_List->insert(Ipsi4); + + int n; + // since we used x instead of r, these global coordinates are fake + double *pox[3]; + for (int i = 0; i < 3; i++) + pox[i] = new double[n_tot]; + for (n = 0; n < n_tot; n++) + { + pox[0][n] = rex * nx_g[n]; + pox[1][n] = rex * ny_g[n]; + pox[2][n] = rex * nz_g[n]; + } + + double *shellf; + shellf = new double[n_tot * InList]; + + GH->Interp_Points_2D(DG_List, n_tot, pox, shellf, Symmetry); + + int mp, Lp, Nmin, Nmax; + + mp = n_tot / cpusize; + Lp = n_tot - cpusize * mp; + + if (Lp > myrank) + { + Nmin = myrank * mp + myrank; + Nmax = Nmin + mp; + } + else + { + Nmin = myrank * mp + Lp; + Nmax = Nmin + mp - 1; + } + + //|~~~~~> Integrate the dot product of Dphi with the surface normal. + + double *RP_out, *IP_out; + RP_out = new double[NN]; + IP_out = new double[NN]; + + for (int ii = 0; ii < NN; ii++) + { + RP_out[ii] = 0; + IP_out[ii] = 0; + } + // theta part + double costheta, thetap; + double cosmphi, sinmphi; + + int i, j; + int lpsy = 0; + if (Symmetry == 0) + lpsy = 1; + else if (Symmetry == 1) + lpsy = 2; + else if (Symmetry == 2) + lpsy = 8; + + double psi4RR, psi4II; + for (n = Nmin; n <= Nmax; n++) + { + // need round off always + i = int(n / N_phi); // int(1.723) = 1, int(-1.732) = -1 + j = n - i * N_phi; + + int countlm = 0; + for (int pl = spinw; pl < maxl + 1; pl++) + for (int pm = -pl; pm < pl + 1; pm++) + { + for (int lp = 0; lp < lpsy; lp++) + { + switch (lp) + { + case 0: //+++ (theta, phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = sin(pm * (j + 0.5) * dphi); + psi4RR = shellf[InList * n]; + psi4II = shellf[InList * n + 1]; + break; + case 1: //++- (pi-theta, phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = sin(pm * (j + 0.5) * dphi); + psi4RR = shellf[InList * n]; + psi4II = -shellf[InList * n + 1]; + break; + case 2: //+-+ (theta, 2*pi-phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = -sin(pm * (j + 0.5) * dphi); + psi4RR = shellf[InList * n]; + psi4II = -shellf[InList * n + 1]; + break; + case 3: //+-- (pi-theta, 2*pi-phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = -sin(pm * (j + 0.5) * dphi); + psi4RR = shellf[InList * n]; + psi4II = shellf[InList * n + 1]; + break; + case 4: //-++ (theta, pi-phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); + psi4RR = shellf[InList * n]; + psi4II = -shellf[InList * n + 1]; + break; + case 5: //-+- (pi-theta, pi-phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); + psi4RR = shellf[InList * n]; + psi4II = shellf[InList * n + 1]; + break; + case 6: //--+ (theta, pi+phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); + psi4RR = shellf[InList * n]; + psi4II = shellf[InList * n + 1]; + break; + case 7: //--- (pi-theta, pi+phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); + psi4RR = shellf[InList * n]; + psi4II = -shellf[InList * n + 1]; + } + + thetap = sqrt((2 * pl + 1.0) / 4.0 / PI) * misc::Wigner_d_function(pl, pm, spinw, costheta); // note the variation from -2 to 2 + // based on Eq.(41) of PRD 77, 024027 (2008) +#ifdef GaussInt + // wtcostheta is even function respect costheta + RP_out[countlm] = RP_out[countlm] + thetap * (psi4RR * cosmphi + psi4II * sinmphi) * wtcostheta[i]; + IP_out[countlm] = IP_out[countlm] + thetap * (psi4II * cosmphi - psi4RR * sinmphi) * wtcostheta[i]; +#else + RP_out[countlm] = RP_out[countlm] + thetap * (psi4RR * cosmphi + psi4II * sinmphi); // + is because \bar of \bar{Y^s_lm} in Eq.(40) + // of PRD 77, 024027 (2008) + IP_out[countlm] = IP_out[countlm] + thetap * (psi4II * cosmphi - psi4RR * sinmphi); +#endif + } + countlm++; // no sanity check for countlm and NN which should be noted in the input parameters + } + } + + for (int ii = 0; ii < NN; ii++) + { +// do not need multiply with rex for null shell +#ifdef GaussInt + RP_out[ii] = RP_out[ii] * dphi; + IP_out[ii] = IP_out[ii] * dphi; +#else + RP_out[ii] = RP_out[ii] * dphi * dcostheta; + IP_out[ii] = IP_out[ii] * dphi * dcostheta; +#endif + } + //|------+ Communicate and sum the results from each processor. + + { + double *RPIP_out = new double[2 * NN]; + double *RPIP = new double[2 * NN]; + memcpy(RPIP_out, RP_out, NN * sizeof(double)); + memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); + MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + memcpy(RP, RPIP, NN * sizeof(double)); + memcpy(IP, RPIP + NN, NN * sizeof(double)); + delete[] RPIP_out; + delete[] RPIP; + } + + //|------= Free memory. + + delete[] pox[0]; + delete[] pox[1]; + delete[] pox[2]; + delete[] shellf; + delete[] RP_out; + delete[] IP_out; + DG_List->clearList(); +} +//|---------------------------------------------------------------- +// for null shell patch +//|---------------------------------------------------------------- +// rex is x instead of r +void surface_integral::surf_Wave(double rex, int lev, NullShellPatch *GH, var *Rpsi4, var *Ipsi4, + int spinw, int maxl, int NN, double *RP, double *IP, + monitor *Monitor) // NN is the length of RP and IP +// spinw 0 for scalar; 1 for electricmagnetic wave; 2 for gravitaitonal wave +// we always assume spinw >= 0 +{ + const int InList = 2; + + MyList *DG_List = new MyList(Rpsi4); + DG_List->insert(Ipsi4); + + int n; + // since we used x instead of r, these global coordinates are fake + double *pox[3]; + for (int i = 0; i < 3; i++) + pox[i] = new double[n_tot]; + for (n = 0; n < n_tot; n++) + { + pox[0][n] = rex * nx_g[n]; + pox[1][n] = rex * ny_g[n]; + pox[2][n] = rex * nz_g[n]; + } + + double *shellf; + shellf = new double[n_tot * InList]; + + GH->Interp_Points_2D(DG_List, n_tot, pox, shellf, Symmetry); + + int mp, Lp, Nmin, Nmax; + + mp = n_tot / cpusize; + Lp = n_tot - cpusize * mp; + + if (Lp > myrank) + { + Nmin = myrank * mp + myrank; + Nmax = Nmin + mp; + } + else + { + Nmin = myrank * mp + Lp; + Nmax = Nmin + mp - 1; + } + + //|~~~~~> Integrate the dot product of Dphi with the surface normal. + + double *RP_out, *IP_out; + RP_out = new double[NN]; + IP_out = new double[NN]; + + for (int ii = 0; ii < NN; ii++) + { + RP_out[ii] = 0; + IP_out[ii] = 0; + } + // theta part + double costheta, thetap; + double cosmphi, sinmphi; + + int i, j; + int lpsy = 0; + if (Symmetry == 0) + lpsy = 1; + else if (Symmetry == 1) + lpsy = 2; + else if (Symmetry == 2) + lpsy = 8; + + double psi4RR, psi4II; + for (n = Nmin; n <= Nmax; n++) + { + // need round off always + i = int(n / N_phi); // int(1.723) = 1, int(-1.732) = -1 + j = n - i * N_phi; + + int countlm = 0; + for (int pl = spinw; pl < maxl + 1; pl++) + for (int pm = -pl; pm < pl + 1; pm++) + { + for (int lp = 0; lp < lpsy; lp++) + { + switch (lp) + { + case 0: //+++ (theta, phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = sin(pm * (j + 0.5) * dphi); + psi4RR = shellf[InList * n]; + psi4II = shellf[InList * n + 1]; + break; + case 1: //++- (pi-theta, phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = sin(pm * (j + 0.5) * dphi); + psi4RR = shellf[InList * n]; + psi4II = -shellf[InList * n + 1]; + break; + case 2: //+-+ (theta, 2*pi-phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = -sin(pm * (j + 0.5) * dphi); + psi4RR = shellf[InList * n]; + psi4II = -shellf[InList * n + 1]; + break; + case 3: //+-- (pi-theta, 2*pi-phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = -sin(pm * (j + 0.5) * dphi); + psi4RR = shellf[InList * n]; + psi4II = shellf[InList * n + 1]; + break; + case 4: //-++ (theta, pi-phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); + psi4RR = shellf[InList * n]; + psi4II = -shellf[InList * n + 1]; + break; + case 5: //-+- (pi-theta, pi-phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); + psi4RR = shellf[InList * n]; + psi4II = shellf[InList * n + 1]; + break; + case 6: //--+ (theta, pi+phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); + psi4RR = shellf[InList * n]; + psi4II = shellf[InList * n + 1]; + break; + case 7: //--- (pi-theta, pi+phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); + psi4RR = shellf[InList * n]; + psi4II = -shellf[InList * n + 1]; + } + + thetap = sqrt((2 * pl + 1.0) / 4.0 / PI) * misc::Wigner_d_function(pl, pm, spinw, costheta); // note the variation from -2 to 2 + // based on Eq.(41) of PRD 77, 024027 (2008) +#ifdef GaussInt + // wtcostheta is even function respect costheta + RP_out[countlm] = RP_out[countlm] + thetap * (psi4RR * cosmphi + psi4II * sinmphi) * wtcostheta[i]; + IP_out[countlm] = IP_out[countlm] + thetap * (psi4II * cosmphi - psi4RR * sinmphi) * wtcostheta[i]; +#else + RP_out[countlm] = RP_out[countlm] + thetap * (psi4RR * cosmphi + psi4II * sinmphi); // + is because \bar of \bar{Y^s_lm} in Eq.(40) + // of PRD 77, 024027 (2008) + IP_out[countlm] = IP_out[countlm] + thetap * (psi4II * cosmphi - psi4RR * sinmphi); +#endif + } + countlm++; // no sanity check for countlm and NN which should be noted in the input parameters + } + } + + for (int ii = 0; ii < NN; ii++) + { +// do not need multiply with rex for null shell +#ifdef GaussInt + RP_out[ii] = RP_out[ii] * dphi; + IP_out[ii] = IP_out[ii] * dphi; +#else + RP_out[ii] = RP_out[ii] * dphi * dcostheta; + IP_out[ii] = IP_out[ii] * dphi * dcostheta; +#endif + } + //|------+ Communicate and sum the results from each processor. + + { + double *RPIP_out = new double[2 * NN]; + double *RPIP = new double[2 * NN]; + memcpy(RPIP_out, RP_out, NN * sizeof(double)); + memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); + MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + memcpy(RP, RPIP, NN * sizeof(double)); + memcpy(IP, RPIP + NN, NN * sizeof(double)); + delete[] RPIP_out; + delete[] RPIP; + } + + //|------= Free memory. + + delete[] pox[0]; + delete[] pox[1]; + delete[] pox[2]; + delete[] shellf; + delete[] RP_out; + delete[] IP_out; + DG_List->clearList(); +} +//|---------------------------------------------------- +//| +//| ADM mass, linear momentum and angular momentum +//| +//|---------------------------------------------------- +void surface_integral::surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var *trK, + var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz, + var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz, + var *Gmx, var *Gmy, var *Gmz, + var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs, // temparay memory for mass^i + double *Rout, monitor *Monitor) +{ + if (myrank == 0 && GH->grids[lev] != 1) + if (Monitor && Monitor->outfile) + Monitor->outfile << "WARNING: surface integral on multipatches" << endl; + else + cout << "WARNING: surface integral on multipatches" << endl; + + double mass, px, py, pz, sx, sy, sz; + + MyList *Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BP = Pp->data->blb; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { + f_admmass_bssn(cg->shape, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[chi->sgfn], cg->fgfs[trK->sgfn], + cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], + cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn], + cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn], + cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], + Symmetry); + } + if (BP == Pp->data->ble) + break; + BP = BP->next; + } + Pp = Pp->next; + } + + const int InList = 17; + + MyList *DG_List = new MyList(Sfx_rhs); + DG_List->insert(Sfy_rhs); + DG_List->insert(Sfz_rhs); + DG_List->insert(chi); + DG_List->insert(trK); + DG_List->insert(gxx); + DG_List->insert(gxy); + DG_List->insert(gxz); + DG_List->insert(gyy); + DG_List->insert(gyz); + DG_List->insert(gzz); + DG_List->insert(Axx); + DG_List->insert(Axy); + DG_List->insert(Axz); + DG_List->insert(Ayy); + DG_List->insert(Ayz); + DG_List->insert(Azz); + + int n; + double *pox[3]; + for (int i = 0; i < 3; i++) + pox[i] = new double[n_tot]; + for (n = 0; n < n_tot; n++) + { + pox[0][n] = rex * nx_g[n]; + pox[1][n] = rex * ny_g[n]; + pox[2][n] = rex * nz_g[n]; + } + + int mp, Lp, Nmin, Nmax; + mp = n_tot / cpusize; + Lp = n_tot - cpusize * mp; + if (Lp > myrank) + { + Nmin = myrank * mp + myrank; + Nmax = Nmin + mp; + } + else + { + Nmin = myrank * mp + Lp; + Nmax = Nmin + mp - 1; + } + + double *shellf; + shellf = new double[n_tot * InList]; + + // we have assumed there is only one box on this level, + // so we do not need loop boxes + GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry, Nmin, Nmax); + + double Mass_out = 0; + double ang_outx, ang_outy, ang_outz; + double p_outx, p_outy, p_outz; + ang_outx = ang_outy = ang_outz = 0.0; + p_outx = p_outy = p_outz = 0.0; + const double f1o8 = 0.125; + + double Chi, Psi; + double Gxx, Gxy, Gxz, Gyy, Gyz, Gzz; + double gupxx, gupxy, gupxz, gupyy, gupyz, gupzz; + double TRK, axx, axy, axz, ayy, ayz, azz; + double aupxx, aupxy, aupxz, aupyx, aupyy, aupyz, aupzx, aupzy, aupzz; + int i; + for (n = Nmin; n <= Nmax; n++) + { + // need round off always + i = int(n / N_phi); // int(1.723) = 1, int(-1.732) = -1 + + Chi = shellf[InList * n + 3]; // chi in fact + TRK = shellf[InList * n + 4]; + Gxx = shellf[InList * n + 5] + 1.0; + Gxy = shellf[InList * n + 6]; + Gxz = shellf[InList * n + 7]; + Gyy = shellf[InList * n + 8] + 1.0; + Gyz = shellf[InList * n + 9]; + Gzz = shellf[InList * n + 10] + 1.0; + axx = shellf[InList * n + 11]; + axy = shellf[InList * n + 12]; + axz = shellf[InList * n + 13]; + ayy = shellf[InList * n + 14]; + ayz = shellf[InList * n + 15]; + azz = shellf[InList * n + 16]; + + Chi = 1.0 / (1.0 + Chi); // exp(4*phi) + Psi = Chi * sqrt(Chi); // Psi^6 + +// Chi^2 corresponds to metric determinant +// but this factor has been considered in f_admmass_bssn +#ifdef GaussInt + // wtcostheta is even function respect costheta + Mass_out = Mass_out + (shellf[InList * n] * nx_g[n] + shellf[InList * n + 1] * ny_g[n] + shellf[InList * n + 2] * nz_g[n]) * wtcostheta[i]; +#else + Mass_out = Mass_out + (shellf[InList * n] * nx_g[n] + shellf[InList * n + 1] * ny_g[n] + shellf[InList * n + 2] * nz_g[n]); +#endif + + gupzz = Gxx * Gyy * Gzz + Gxy * Gyz * Gxz + Gxz * Gxy * Gyz - + Gxz * Gyy * Gxz - Gxy * Gxy * Gzz - Gxx * Gyz * Gyz; + gupxx = (Gyy * Gzz - Gyz * Gyz) / gupzz; + gupxy = -(Gxy * Gzz - Gyz * Gxz) / gupzz; + gupxz = (Gxy * Gyz - Gyy * Gxz) / gupzz; + gupyy = (Gxx * Gzz - Gxz * Gxz) / gupzz; + gupyz = -(Gxx * Gyz - Gxy * Gxz) / gupzz; + gupzz = (Gxx * Gyy - Gxy * Gxy) / gupzz; + + aupxx = gupxx * axx + gupxy * axy + gupxz * axz; + aupxy = gupxx * axy + gupxy * ayy + gupxz * ayz; + aupxz = gupxx * axz + gupxy * ayz + gupxz * azz; + aupyx = gupxy * axx + gupyy * axy + gupyz * axz; + aupyy = gupxy * axy + gupyy * ayy + gupyz * ayz; + aupyz = gupxy * axz + gupyy * ayz + gupyz * azz; + aupzx = gupxz * axx + gupyz * axy + gupzz * axz; + aupzy = gupxz * axy + gupyz * ayy + gupzz * ayz; + aupzz = gupxz * axz + gupyz * ayz + gupzz * azz; + if (Symmetry == 0) + { +#ifdef GaussInt + // wtcostheta is even function respect costheta + // 1/8\pi \int \psi^6 (y A^m_z - zA^m_y) dS_m + ang_outx = ang_outx + f1o8 * Psi * (nx_g[n] * (pox[1][n] * aupxz - pox[2][n] * aupxy) + ny_g[n] * (pox[1][n] * aupyz - pox[2][n] * aupyy) + nz_g[n] * (pox[1][n] * aupzz - pox[2][n] * aupzy)) * wtcostheta[i]; + // 1/8\pi \int \psi^6 (z A^m_x - xA^m_z) dS_m + ang_outy = ang_outy + f1o8 * Psi * (nx_g[n] * (pox[2][n] * aupxx - pox[0][n] * aupxz) + ny_g[n] * (pox[2][n] * aupyx - pox[0][n] * aupyz) + nz_g[n] * (pox[2][n] * aupzx - pox[0][n] * aupzz)) * wtcostheta[i]; + // 1/8\pi \int \psi^6 (x A^m_y - yA^m_x) dS_m + ang_outz = ang_outz + f1o8 * Psi * (nx_g[n] * (pox[0][n] * aupxy - pox[1][n] * aupxx) + ny_g[n] * (pox[0][n] * aupyy - pox[1][n] * aupyx) + nz_g[n] * (pox[0][n] * aupzy - pox[1][n] * aupzx)) * wtcostheta[i]; +#else + // 1/8\pi \int \psi^6 (y A^m_z - zA^m_y) dS_m + ang_outx = ang_outx + f1o8 * Psi * (nx_g[n] * (pox[1][n] * aupxz - pox[2][n] * aupxy) + ny_g[n] * (pox[1][n] * aupyz - pox[2][n] * aupyy) + nz_g[n] * (pox[1][n] * aupzz - pox[2][n] * aupzy)); + // 1/8\pi \int \psi^6 (z A^m_x - xA^m_z) dS_m + ang_outy = ang_outy + f1o8 * Psi * (nx_g[n] * (pox[2][n] * aupxx - pox[0][n] * aupxz) + ny_g[n] * (pox[2][n] * aupyx - pox[0][n] * aupyz) + nz_g[n] * (pox[2][n] * aupzx - pox[0][n] * aupzz)); + // 1/8\pi \int \psi^6 (x A^m_y - yA^m_x) dS_m + ang_outz = ang_outz + f1o8 * Psi * (nx_g[n] * (pox[0][n] * aupxy - pox[1][n] * aupxx) + ny_g[n] * (pox[0][n] * aupyy - pox[1][n] * aupyx) + nz_g[n] * (pox[0][n] * aupzy - pox[1][n] * aupzx)); +#endif + } + else if (Symmetry == 1) + { +#ifdef GaussInt + ang_outz = ang_outz + f1o8 * Psi * (nx_g[n] * (pox[0][n] * aupxy - pox[1][n] * aupxx) + ny_g[n] * (pox[0][n] * aupyy - pox[1][n] * aupyx) + nz_g[n] * (pox[0][n] * aupzy - pox[1][n] * aupzx)) * wtcostheta[i]; +#else + ang_outz = ang_outz + f1o8 * Psi * (nx_g[n] * (pox[0][n] * aupxy - pox[1][n] * aupxx) + ny_g[n] * (pox[0][n] * aupyy - pox[1][n] * aupyx) + nz_g[n] * (pox[0][n] * aupzy - pox[1][n] * aupzx)); +#endif + } + + axx = Chi * (axx + Gxx * TRK / 3.0); + axy = Chi * (axy + Gxy * TRK / 3.0); + axz = Chi * (axz + Gxz * TRK / 3.0); + ayy = Chi * (ayy + Gyy * TRK / 3.0); + ayz = Chi * (ayz + Gyz * TRK / 3.0); + azz = Chi * (azz + Gzz * TRK / 3.0); + + axx = axx - TRK; + ayy = ayy - TRK; + azz = azz - TRK; + + // 1/8\pi \int \psi^6 (K_mi - \delta_mi trK) dS^m: lower index linear momentum + if (Symmetry == 0) + { +#ifdef GaussInt + p_outx = p_outx + f1o8 * Psi * (nx_g[n] * axx + ny_g[n] * axy + nz_g[n] * axz) * wtcostheta[i]; + p_outy = p_outy + f1o8 * Psi * (nx_g[n] * axy + ny_g[n] * ayy + nz_g[n] * ayz) * wtcostheta[i]; + p_outz = p_outz + f1o8 * Psi * (nx_g[n] * axz + ny_g[n] * ayz + nz_g[n] * azz) * wtcostheta[i]; +#else + p_outx = p_outx + f1o8 * Psi * (nx_g[n] * axx + ny_g[n] * axy + nz_g[n] * axz); + p_outy = p_outy + f1o8 * Psi * (nx_g[n] * axy + ny_g[n] * ayy + nz_g[n] * ayz); + p_outz = p_outz + f1o8 * Psi * (nx_g[n] * axz + ny_g[n] * ayz + nz_g[n] * azz); +#endif + } + else if (Symmetry == 1) + { +#ifdef GaussInt + p_outx = p_outx + f1o8 * Psi * (nx_g[n] * axx + ny_g[n] * axy + nz_g[n] * axz) * wtcostheta[i]; + p_outy = p_outy + f1o8 * Psi * (nx_g[n] * axy + ny_g[n] * ayy + nz_g[n] * ayz) * wtcostheta[i]; +#else + p_outx = p_outx + f1o8 * Psi * (nx_g[n] * axx + ny_g[n] * axy + nz_g[n] * axz); + p_outy = p_outy + f1o8 * Psi * (nx_g[n] * axy + ny_g[n] * ayy + nz_g[n] * ayz); +#endif + } + } + + { + double scalar_out[7] = {Mass_out, ang_outx, ang_outy, ang_outz, p_outx, p_outy, p_outz}; + double scalar_in[7]; + MPI_Allreduce(scalar_out, scalar_in, 7, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + mass = scalar_in[0]; sx = scalar_in[1]; sy = scalar_in[2]; sz = scalar_in[3]; + px = scalar_in[4]; py = scalar_in[5]; pz = scalar_in[6]; + } + +#ifdef GaussInt + mass = mass * rex * rex * dphi * factor; + + sx = sx * rex * rex * dphi * (1.0 / PI) * factor; + sy = sy * rex * rex * dphi * (1.0 / PI) * factor; + sz = sz * rex * rex * dphi * (1.0 / PI) * factor; + + px = px * rex * rex * dphi * (1.0 / PI) * factor; + py = py * rex * rex * dphi * (1.0 / PI) * factor; + pz = pz * rex * rex * dphi * (1.0 / PI) * factor; +#else + mass = mass * rex * rex * dphi * dcostheta * factor; + + sx = sx * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; + sy = sy * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; + sz = sz * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; + + px = px * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; + py = py * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; + pz = pz * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; +#endif + + Rout[0] = mass; + Rout[1] = px; + Rout[2] = py; + Rout[3] = pz; + Rout[4] = sx; + Rout[5] = sy; + Rout[6] = sz; + + delete[] pox[0]; + delete[] pox[1]; + delete[] pox[2]; + delete[] shellf; + DG_List->clearList(); +} +void surface_integral::surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var *trK, + var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz, + var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz, + var *Gmx, var *Gmy, var *Gmz, + var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs, // temparay memory for mass^i + double *Rout, monitor *Monitor, MPI_Comm Comm_here) +{ + int lmyrank; + MPI_Comm_rank(Comm_here, &lmyrank); + if (lmyrank == 0 && GH->grids[lev] != 1) + if (Monitor && Monitor->outfile) + Monitor->outfile << "WARNING: surface integral on multipatches" << endl; + else + cout << "WARNING: surface integral on multipatches" << endl; + + double mass, px, py, pz, sx, sy, sz; + + MyList *Pp = GH->PatL[lev]; + while (Pp) + { + MyList *BP = Pp->data->blb; + while (BP) + { + Block *cg = BP->data; + if (myrank == cg->rank) + { + f_admmass_bssn(cg->shape, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[chi->sgfn], cg->fgfs[trK->sgfn], + cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], + cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn], + cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn], + cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], + Symmetry); + } + if (BP == Pp->data->ble) + break; + BP = BP->next; + } + Pp = Pp->next; + } + + const int InList = 17; + + MyList *DG_List = new MyList(Sfx_rhs); + DG_List->insert(Sfy_rhs); + DG_List->insert(Sfz_rhs); + DG_List->insert(chi); + DG_List->insert(trK); + DG_List->insert(gxx); + DG_List->insert(gxy); + DG_List->insert(gxz); + DG_List->insert(gyy); + DG_List->insert(gyz); + DG_List->insert(gzz); + DG_List->insert(Axx); + DG_List->insert(Axy); + DG_List->insert(Axz); + DG_List->insert(Ayy); + DG_List->insert(Ayz); + DG_List->insert(Azz); + + int n; + double *pox[3]; + for (int i = 0; i < 3; i++) + pox[i] = new double[n_tot]; + for (n = 0; n < n_tot; n++) + { + pox[0][n] = rex * nx_g[n]; + pox[1][n] = rex * ny_g[n]; + pox[2][n] = rex * nz_g[n]; + } + + double *shellf; + shellf = new double[n_tot * InList]; + + // we have assumed there is only one box on this level, + // so we do not need loop boxes + GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry, Comm_here); + + double Mass_out = 0; + double ang_outx, ang_outy, ang_outz; + double p_outx, p_outy, p_outz; + ang_outx = ang_outy = ang_outz = 0.0; + p_outx = p_outy = p_outz = 0.0; + const double f1o8 = 0.125; + + int mp, Lp, Nmin, Nmax; + + int cpusize_here; + MPI_Comm_size(Comm_here, &cpusize_here); + + mp = n_tot / cpusize_here; + Lp = n_tot - cpusize_here * mp; + + if (Lp > lmyrank) + { + Nmin = lmyrank * mp + lmyrank; + Nmax = Nmin + mp; + } + else + { + Nmin = lmyrank * mp + Lp; + Nmax = Nmin + mp - 1; + } + + double Chi, Psi; + double Gxx, Gxy, Gxz, Gyy, Gyz, Gzz; + double gupxx, gupxy, gupxz, gupyy, gupyz, gupzz; + double TRK, axx, axy, axz, ayy, ayz, azz; + double aupxx, aupxy, aupxz, aupyx, aupyy, aupyz, aupzx, aupzy, aupzz; + int i; + for (n = Nmin; n <= Nmax; n++) + { + // need round off always + i = int(n / N_phi); // int(1.723) = 1, int(-1.732) = -1 + + Chi = shellf[InList * n + 3]; // chi in fact + TRK = shellf[InList * n + 4]; + Gxx = shellf[InList * n + 5] + 1.0; + Gxy = shellf[InList * n + 6]; + Gxz = shellf[InList * n + 7]; + Gyy = shellf[InList * n + 8] + 1.0; + Gyz = shellf[InList * n + 9]; + Gzz = shellf[InList * n + 10] + 1.0; + axx = shellf[InList * n + 11]; + axy = shellf[InList * n + 12]; + axz = shellf[InList * n + 13]; + ayy = shellf[InList * n + 14]; + ayz = shellf[InList * n + 15]; + azz = shellf[InList * n + 16]; + + Chi = 1.0 / (1.0 + Chi); // exp(4*phi) + Psi = Chi * sqrt(Chi); // Psi^6 + +// Chi^2 corresponds to metric determinant +// but this factor has been considered in f_admmass_bssn +#ifdef GaussInt + // wtcostheta is even function respect costheta + Mass_out = Mass_out + (shellf[InList * n] * nx_g[n] + shellf[InList * n + 1] * ny_g[n] + shellf[InList * n + 2] * nz_g[n]) * wtcostheta[i]; +#else + Mass_out = Mass_out + (shellf[InList * n] * nx_g[n] + shellf[InList * n + 1] * ny_g[n] + shellf[InList * n + 2] * nz_g[n]); +#endif + + gupzz = Gxx * Gyy * Gzz + Gxy * Gyz * Gxz + Gxz * Gxy * Gyz - + Gxz * Gyy * Gxz - Gxy * Gxy * Gzz - Gxx * Gyz * Gyz; + gupxx = (Gyy * Gzz - Gyz * Gyz) / gupzz; + gupxy = -(Gxy * Gzz - Gyz * Gxz) / gupzz; + gupxz = (Gxy * Gyz - Gyy * Gxz) / gupzz; + gupyy = (Gxx * Gzz - Gxz * Gxz) / gupzz; + gupyz = -(Gxx * Gyz - Gxy * Gxz) / gupzz; + gupzz = (Gxx * Gyy - Gxy * Gxy) / gupzz; + + aupxx = gupxx * axx + gupxy * axy + gupxz * axz; + aupxy = gupxx * axy + gupxy * ayy + gupxz * ayz; + aupxz = gupxx * axz + gupxy * ayz + gupxz * azz; + aupyx = gupxy * axx + gupyy * axy + gupyz * axz; + aupyy = gupxy * axy + gupyy * ayy + gupyz * ayz; + aupyz = gupxy * axz + gupyy * ayz + gupyz * azz; + aupzx = gupxz * axx + gupyz * axy + gupzz * axz; + aupzy = gupxz * axy + gupyz * ayy + gupzz * ayz; + aupzz = gupxz * axz + gupyz * ayz + gupzz * azz; + if (Symmetry == 0) + { +#ifdef GaussInt + // wtcostheta is even function respect costheta + // 1/8\pi \int \psi^6 (y A^m_z - zA^m_y) dS_m + ang_outx = ang_outx + f1o8 * Psi * (nx_g[n] * (pox[1][n] * aupxz - pox[2][n] * aupxy) + ny_g[n] * (pox[1][n] * aupyz - pox[2][n] * aupyy) + nz_g[n] * (pox[1][n] * aupzz - pox[2][n] * aupzy)) * wtcostheta[i]; + // 1/8\pi \int \psi^6 (z A^m_x - xA^m_z) dS_m + ang_outy = ang_outy + f1o8 * Psi * (nx_g[n] * (pox[2][n] * aupxx - pox[0][n] * aupxz) + ny_g[n] * (pox[2][n] * aupyx - pox[0][n] * aupyz) + nz_g[n] * (pox[2][n] * aupzx - pox[0][n] * aupzz)) * wtcostheta[i]; + // 1/8\pi \int \psi^6 (x A^m_y - yA^m_x) dS_m + ang_outz = ang_outz + f1o8 * Psi * (nx_g[n] * (pox[0][n] * aupxy - pox[1][n] * aupxx) + ny_g[n] * (pox[0][n] * aupyy - pox[1][n] * aupyx) + nz_g[n] * (pox[0][n] * aupzy - pox[1][n] * aupzx)) * wtcostheta[i]; +#else + // 1/8\pi \int \psi^6 (y A^m_z - zA^m_y) dS_m + ang_outx = ang_outx + f1o8 * Psi * (nx_g[n] * (pox[1][n] * aupxz - pox[2][n] * aupxy) + ny_g[n] * (pox[1][n] * aupyz - pox[2][n] * aupyy) + nz_g[n] * (pox[1][n] * aupzz - pox[2][n] * aupzy)); + // 1/8\pi \int \psi^6 (z A^m_x - xA^m_z) dS_m + ang_outy = ang_outy + f1o8 * Psi * (nx_g[n] * (pox[2][n] * aupxx - pox[0][n] * aupxz) + ny_g[n] * (pox[2][n] * aupyx - pox[0][n] * aupyz) + nz_g[n] * (pox[2][n] * aupzx - pox[0][n] * aupzz)); + // 1/8\pi \int \psi^6 (x A^m_y - yA^m_x) dS_m + ang_outz = ang_outz + f1o8 * Psi * (nx_g[n] * (pox[0][n] * aupxy - pox[1][n] * aupxx) + ny_g[n] * (pox[0][n] * aupyy - pox[1][n] * aupyx) + nz_g[n] * (pox[0][n] * aupzy - pox[1][n] * aupzx)); +#endif + } + else if (Symmetry == 1) + { +#ifdef GaussInt + ang_outz = ang_outz + f1o8 * Psi * (nx_g[n] * (pox[0][n] * aupxy - pox[1][n] * aupxx) + ny_g[n] * (pox[0][n] * aupyy - pox[1][n] * aupyx) + nz_g[n] * (pox[0][n] * aupzy - pox[1][n] * aupzx)) * wtcostheta[i]; +#else + ang_outz = ang_outz + f1o8 * Psi * (nx_g[n] * (pox[0][n] * aupxy - pox[1][n] * aupxx) + ny_g[n] * (pox[0][n] * aupyy - pox[1][n] * aupyx) + nz_g[n] * (pox[0][n] * aupzy - pox[1][n] * aupzx)); +#endif + } + + axx = Chi * (axx + Gxx * TRK / 3.0); + axy = Chi * (axy + Gxy * TRK / 3.0); + axz = Chi * (axz + Gxz * TRK / 3.0); + ayy = Chi * (ayy + Gyy * TRK / 3.0); + ayz = Chi * (ayz + Gyz * TRK / 3.0); + azz = Chi * (azz + Gzz * TRK / 3.0); + + axx = axx - TRK; + ayy = ayy - TRK; + azz = azz - TRK; + + // 1/8\pi \int \psi^6 (K_mi - \delta_mi trK) dS^m: lower index linear momentum + if (Symmetry == 0) + { +#ifdef GaussInt + p_outx = p_outx + f1o8 * Psi * (nx_g[n] * axx + ny_g[n] * axy + nz_g[n] * axz) * wtcostheta[i]; + p_outy = p_outy + f1o8 * Psi * (nx_g[n] * axy + ny_g[n] * ayy + nz_g[n] * ayz) * wtcostheta[i]; + p_outz = p_outz + f1o8 * Psi * (nx_g[n] * axz + ny_g[n] * ayz + nz_g[n] * azz) * wtcostheta[i]; +#else + p_outx = p_outx + f1o8 * Psi * (nx_g[n] * axx + ny_g[n] * axy + nz_g[n] * axz); + p_outy = p_outy + f1o8 * Psi * (nx_g[n] * axy + ny_g[n] * ayy + nz_g[n] * ayz); + p_outz = p_outz + f1o8 * Psi * (nx_g[n] * axz + ny_g[n] * ayz + nz_g[n] * azz); +#endif + } + else if (Symmetry == 1) + { +#ifdef GaussInt + p_outx = p_outx + f1o8 * Psi * (nx_g[n] * axx + ny_g[n] * axy + nz_g[n] * axz) * wtcostheta[i]; + p_outy = p_outy + f1o8 * Psi * (nx_g[n] * axy + ny_g[n] * ayy + nz_g[n] * ayz) * wtcostheta[i]; +#else + p_outx = p_outx + f1o8 * Psi * (nx_g[n] * axx + ny_g[n] * axy + nz_g[n] * axz); + p_outy = p_outy + f1o8 * Psi * (nx_g[n] * axy + ny_g[n] * ayy + nz_g[n] * ayz); +#endif + } + } + + { + double scalar_out[7] = {Mass_out, ang_outx, ang_outy, ang_outz, p_outx, p_outy, p_outz}; + double scalar_in[7]; + MPI_Allreduce(scalar_out, scalar_in, 7, MPI_DOUBLE, MPI_SUM, Comm_here); + mass = scalar_in[0]; sx = scalar_in[1]; sy = scalar_in[2]; sz = scalar_in[3]; + px = scalar_in[4]; py = scalar_in[5]; pz = scalar_in[6]; + } + +#ifdef GaussInt + mass = mass * rex * rex * dphi * factor; + + sx = sx * rex * rex * dphi * (1.0 / PI) * factor; + sy = sy * rex * rex * dphi * (1.0 / PI) * factor; + sz = sz * rex * rex * dphi * (1.0 / PI) * factor; + + px = px * rex * rex * dphi * (1.0 / PI) * factor; + py = py * rex * rex * dphi * (1.0 / PI) * factor; + pz = pz * rex * rex * dphi * (1.0 / PI) * factor; +#else + mass = mass * rex * rex * dphi * dcostheta * factor; + + sx = sx * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; + sy = sy * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; + sz = sz * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; + + px = px * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; + py = py * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; + pz = pz * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; +#endif + + Rout[0] = mass; + Rout[1] = px; + Rout[2] = py; + Rout[3] = pz; + Rout[4] = sx; + Rout[5] = sy; + Rout[6] = sz; + + delete[] pox[0]; + delete[] pox[1]; + delete[] pox[2]; + delete[] shellf; + DG_List->clearList(); +} +//|---------------------------------------------------------------- +// for shell patch +//|---------------------------------------------------------------- +void surface_integral::surf_MassPAng(double rex, int lev, ShellPatch *GH, var *chi, var *trK, + var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz, + var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz, + var *Gmx, var *Gmy, var *Gmz, + var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs, // temparay memory for mass^i + double *Rout, monitor *Monitor) +{ + if (lev != 0) + { + if (myrank == 0) + { + if (Monitor && Monitor->outfile) + Monitor->outfile << "WARNING: shell surface integral not on level 0" << endl; + else + cout << "WARNING: shell surface integral not on level 0" << endl; + } + return; + } + + double mass, px, py, pz, sx, sy, sz; + + MyList *Pp = GH->PatL; + while (Pp) + { + MyList *BL = Pp->data->blb; + int fngfs = Pp->data->fngfs; + while (BL) + { + Block *cg = BL->data; + if (myrank == cg->rank) + { + f_admmass_bssn_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], + cg->fgfs[fngfs + ShellPatch::gx], cg->fgfs[fngfs + ShellPatch::gy], cg->fgfs[fngfs + ShellPatch::gz], + cg->fgfs[fngfs + ShellPatch::drhodx], cg->fgfs[fngfs + ShellPatch::drhody], cg->fgfs[fngfs + ShellPatch::drhodz], + cg->fgfs[fngfs + ShellPatch::dsigmadx], cg->fgfs[fngfs + ShellPatch::dsigmady], cg->fgfs[fngfs + ShellPatch::dsigmadz], + cg->fgfs[fngfs + ShellPatch::dRdx], cg->fgfs[fngfs + ShellPatch::dRdy], cg->fgfs[fngfs + ShellPatch::dRdz], + cg->fgfs[fngfs + ShellPatch::drhodxx], cg->fgfs[fngfs + ShellPatch::drhodxy], cg->fgfs[fngfs + ShellPatch::drhodxz], + cg->fgfs[fngfs + ShellPatch::drhodyy], cg->fgfs[fngfs + ShellPatch::drhodyz], cg->fgfs[fngfs + ShellPatch::drhodzz], + cg->fgfs[fngfs + ShellPatch::dsigmadxx], cg->fgfs[fngfs + ShellPatch::dsigmadxy], cg->fgfs[fngfs + ShellPatch::dsigmadxz], + cg->fgfs[fngfs + ShellPatch::dsigmadyy], cg->fgfs[fngfs + ShellPatch::dsigmadyz], cg->fgfs[fngfs + ShellPatch::dsigmadzz], + cg->fgfs[fngfs + ShellPatch::dRdxx], cg->fgfs[fngfs + ShellPatch::dRdxy], cg->fgfs[fngfs + ShellPatch::dRdxz], + cg->fgfs[fngfs + ShellPatch::dRdyy], cg->fgfs[fngfs + ShellPatch::dRdyz], cg->fgfs[fngfs + ShellPatch::dRdzz], + cg->fgfs[chi->sgfn], cg->fgfs[trK->sgfn], + cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], + cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn], + cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn], + cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], + Symmetry, Pp->data->sst); + } + if (BL == Pp->data->ble) + break; + BL = BL->next; + } + Pp = Pp->next; + } + + const int InList = 17; + + MyList *DG_List = new MyList(Sfx_rhs); + DG_List->insert(Sfy_rhs); + DG_List->insert(Sfz_rhs); + DG_List->insert(chi); + DG_List->insert(trK); + DG_List->insert(gxx); + DG_List->insert(gxy); + DG_List->insert(gxz); + DG_List->insert(gyy); + DG_List->insert(gyz); + DG_List->insert(gzz); + DG_List->insert(Axx); + DG_List->insert(Axy); + DG_List->insert(Axz); + DG_List->insert(Ayy); + DG_List->insert(Ayz); + DG_List->insert(Azz); + + int n; + double *pox[3]; + for (int i = 0; i < 3; i++) + pox[i] = new double[n_tot]; + for (n = 0; n < n_tot; n++) + { + pox[0][n] = rex * nx_g[n]; + pox[1][n] = rex * ny_g[n]; + pox[2][n] = rex * nz_g[n]; + } + + double *shellf; + shellf = new double[n_tot * InList]; + + // we have assumed there is only one box on this level, + // so we do not need loop boxes + GH->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry); + + double Mass_out = 0; + double ang_outx, ang_outy, ang_outz; + double p_outx, p_outy, p_outz; + ang_outx = ang_outy = ang_outz = 0.0; + p_outx = p_outy = p_outz = 0.0; + const double f1o8 = 0.125; + + int mp, Lp, Nmin, Nmax; + + mp = n_tot / cpusize; + Lp = n_tot - cpusize * mp; + + if (Lp > myrank) + { + Nmin = myrank * mp + myrank; + Nmax = Nmin + mp; + } + else + { + Nmin = myrank * mp + Lp; + Nmax = Nmin + mp - 1; + } + + double Chi, Psi; + double Gxx, Gxy, Gxz, Gyy, Gyz, Gzz; + double gupxx, gupxy, gupxz, gupyy, gupyz, gupzz; + double TRK, axx, axy, axz, ayy, ayz, azz; + double aupxx, aupxy, aupxz, aupyx, aupyy, aupyz, aupzx, aupzy, aupzz; + int i; + for (n = Nmin; n <= Nmax; n++) + { + // need round off always + i = int(n / N_phi); // int(1.723) = 1, int(-1.732) = -1 + + Chi = shellf[InList * n + 3]; // chi in fact + TRK = shellf[InList * n + 4]; + Gxx = shellf[InList * n + 5] + 1.0; + Gxy = shellf[InList * n + 6]; + Gxz = shellf[InList * n + 7]; + Gyy = shellf[InList * n + 8] + 1.0; + Gyz = shellf[InList * n + 9]; + Gzz = shellf[InList * n + 10] + 1.0; + axx = shellf[InList * n + 11]; + axy = shellf[InList * n + 12]; + axz = shellf[InList * n + 13]; + ayy = shellf[InList * n + 14]; + ayz = shellf[InList * n + 15]; + azz = shellf[InList * n + 16]; + + Chi = 1.0 / (1.0 + Chi); // exp(4*phi) + Psi = Chi * sqrt(Chi); // Psi^6 +// Chi^2 corresponds to metric determinant +// but this factor has been considered in f_admmass_bssn +#ifdef GaussInt + // wtcostheta is even function respect costheta + Mass_out = Mass_out + (shellf[InList * n] * nx_g[n] + shellf[InList * n + 1] * ny_g[n] + shellf[InList * n + 2] * nz_g[n]) * wtcostheta[i]; +#else + Mass_out = Mass_out + (shellf[InList * n] * nx_g[n] + shellf[InList * n + 1] * ny_g[n] + shellf[InList * n + 2] * nz_g[n]); +#endif + + gupzz = Gxx * Gyy * Gzz + Gxy * Gyz * Gxz + Gxz * Gxy * Gyz - + Gxz * Gyy * Gxz - Gxy * Gxy * Gzz - Gxx * Gyz * Gyz; + gupxx = (Gyy * Gzz - Gyz * Gyz) / gupzz; + gupxy = -(Gxy * Gzz - Gyz * Gxz) / gupzz; + gupxz = (Gxy * Gyz - Gyy * Gxz) / gupzz; + gupyy = (Gxx * Gzz - Gxz * Gxz) / gupzz; + gupyz = -(Gxx * Gyz - Gxy * Gxz) / gupzz; + gupzz = (Gxx * Gyy - Gxy * Gxy) / gupzz; + + aupxx = gupxx * axx + gupxy * axy + gupxz * axz; + aupxy = gupxx * axy + gupxy * ayy + gupxz * ayz; + aupxz = gupxx * axz + gupxy * ayz + gupxz * azz; + aupyx = gupxy * axx + gupyy * axy + gupyz * axz; + aupyy = gupxy * axy + gupyy * ayy + gupyz * ayz; + aupyz = gupxy * axz + gupyy * ayz + gupyz * azz; + aupzx = gupxz * axx + gupyz * axy + gupzz * axz; + aupzy = gupxz * axy + gupyz * ayy + gupzz * ayz; + aupzz = gupxz * axz + gupyz * ayz + gupzz * azz; + if (Symmetry == 0) + { +#ifdef GaussInt + // wtcostheta is even function respect costheta + // 1/8\pi \int \psi^6 (y A^m_z - zA^m_y) dS_m + ang_outx = ang_outx + f1o8 * Psi * (nx_g[n] * (pox[1][n] * aupxz - pox[2][n] * aupxy) + ny_g[n] * (pox[1][n] * aupyz - pox[2][n] * aupyy) + nz_g[n] * (pox[1][n] * aupzz - pox[2][n] * aupzy)) * wtcostheta[i]; + // 1/8\pi \int \psi^6 (z A^m_x - xA^m_z) dS_m + ang_outy = ang_outy + f1o8 * Psi * (nx_g[n] * (pox[2][n] * aupxx - pox[0][n] * aupxz) + ny_g[n] * (pox[2][n] * aupyx - pox[0][n] * aupyz) + nz_g[n] * (pox[2][n] * aupzx - pox[0][n] * aupzz)) * wtcostheta[i]; + // 1/8\pi \int \psi^6 (x A^m_y - yA^m_x) dS_m + ang_outz = ang_outz + f1o8 * Psi * (nx_g[n] * (pox[0][n] * aupxy - pox[1][n] * aupxx) + ny_g[n] * (pox[0][n] * aupyy - pox[1][n] * aupyx) + nz_g[n] * (pox[0][n] * aupzy - pox[1][n] * aupzx)) * wtcostheta[i]; +#else + // 1/8\pi \int \psi^6 (y A^m_z - zA^m_y) dS_m + ang_outx = ang_outx + f1o8 * Psi * (nx_g[n] * (pox[1][n] * aupxz - pox[2][n] * aupxy) + ny_g[n] * (pox[1][n] * aupyz - pox[2][n] * aupyy) + nz_g[n] * (pox[1][n] * aupzz - pox[2][n] * aupzy)); + // 1/8\pi \int \psi^6 (z A^m_x - xA^m_z) dS_m + ang_outy = ang_outy + f1o8 * Psi * (nx_g[n] * (pox[2][n] * aupxx - pox[0][n] * aupxz) + ny_g[n] * (pox[2][n] * aupyx - pox[0][n] * aupyz) + nz_g[n] * (pox[2][n] * aupzx - pox[0][n] * aupzz)); + // 1/8\pi \int \psi^6 (x A^m_y - yA^m_x) dS_m + ang_outz = ang_outz + f1o8 * Psi * (nx_g[n] * (pox[0][n] * aupxy - pox[1][n] * aupxx) + ny_g[n] * (pox[0][n] * aupyy - pox[1][n] * aupyx) + nz_g[n] * (pox[0][n] * aupzy - pox[1][n] * aupzx)); +#endif + } + else if (Symmetry == 1) + { +#ifdef GaussInt + ang_outz = ang_outz + f1o8 * Psi * (nx_g[n] * (pox[0][n] * aupxy - pox[1][n] * aupxx) + ny_g[n] * (pox[0][n] * aupyy - pox[1][n] * aupyx) + nz_g[n] * (pox[0][n] * aupzy - pox[1][n] * aupzx)) * wtcostheta[i]; +#else + ang_outz = ang_outz + f1o8 * Psi * (nx_g[n] * (pox[0][n] * aupxy - pox[1][n] * aupxx) + ny_g[n] * (pox[0][n] * aupyy - pox[1][n] * aupyx) + nz_g[n] * (pox[0][n] * aupzy - pox[1][n] * aupzx)); +#endif + } + + axx = Chi * (axx + Gxx * TRK / 3.0); + axy = Chi * (axy + Gxy * TRK / 3.0); + axz = Chi * (axz + Gxz * TRK / 3.0); + ayy = Chi * (ayy + Gyy * TRK / 3.0); + ayz = Chi * (ayz + Gyz * TRK / 3.0); + azz = Chi * (azz + Gzz * TRK / 3.0); + + axx = axx - TRK; + ayy = ayy - TRK; + azz = azz - TRK; + + // 1/8\pi \int \psi^6 (K_mi - \delta_mi trK) dS^m: lower index linear momentum + if (Symmetry == 0) + { +#ifdef GaussInt + p_outx = p_outx + f1o8 * Psi * (nx_g[n] * axx + ny_g[n] * axy + nz_g[n] * axz) * wtcostheta[i]; + p_outy = p_outy + f1o8 * Psi * (nx_g[n] * axy + ny_g[n] * ayy + nz_g[n] * ayz) * wtcostheta[i]; + p_outz = p_outz + f1o8 * Psi * (nx_g[n] * axz + ny_g[n] * ayz + nz_g[n] * azz) * wtcostheta[i]; +#else + p_outx = p_outx + f1o8 * Psi * (nx_g[n] * axx + ny_g[n] * axy + nz_g[n] * axz); + p_outy = p_outy + f1o8 * Psi * (nx_g[n] * axy + ny_g[n] * ayy + nz_g[n] * ayz); + p_outz = p_outz + f1o8 * Psi * (nx_g[n] * axz + ny_g[n] * ayz + nz_g[n] * azz); +#endif + } + else if (Symmetry == 1) + { +#ifdef GaussInt + p_outx = p_outx + f1o8 * Psi * (nx_g[n] * axx + ny_g[n] * axy + nz_g[n] * axz) * wtcostheta[i]; + p_outy = p_outy + f1o8 * Psi * (nx_g[n] * axy + ny_g[n] * ayy + nz_g[n] * ayz) * wtcostheta[i]; +#else + p_outx = p_outx + f1o8 * Psi * (nx_g[n] * axx + ny_g[n] * axy + nz_g[n] * axz); + p_outy = p_outy + f1o8 * Psi * (nx_g[n] * axy + ny_g[n] * ayy + nz_g[n] * ayz); +#endif + } + } + + { + double scalar_out[7] = {Mass_out, ang_outx, ang_outy, ang_outz, p_outx, p_outy, p_outz}; + double scalar_in[7]; + MPI_Allreduce(scalar_out, scalar_in, 7, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + mass = scalar_in[0]; sx = scalar_in[1]; sy = scalar_in[2]; sz = scalar_in[3]; + px = scalar_in[4]; py = scalar_in[5]; pz = scalar_in[6]; + } + +#ifdef GaussInt + mass = mass * rex * rex * dphi * factor; + + sx = sx * rex * rex * dphi * (1.0 / PI) * factor; + sy = sy * rex * rex * dphi * (1.0 / PI) * factor; + sz = sz * rex * rex * dphi * (1.0 / PI) * factor; + + px = px * rex * rex * dphi * (1.0 / PI) * factor; + py = py * rex * rex * dphi * (1.0 / PI) * factor; + pz = pz * rex * rex * dphi * (1.0 / PI) * factor; +#else + mass = mass * rex * rex * dphi * dcostheta * factor; + + sx = sx * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; + sy = sy * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; + sz = sz * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; + + px = px * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; + py = py * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; + pz = pz * rex * rex * dphi * dcostheta * (1.0 / PI) * factor; +#endif + + Rout[0] = mass; + Rout[1] = px; + Rout[2] = py; + Rout[3] = pz; + Rout[4] = sx; + Rout[5] = sy; + Rout[6] = sz; + + delete[] pox[0]; + delete[] pox[1]; + delete[] pox[2]; + delete[] shellf; + DG_List->clearList(); +} +//|---------------------------------------------------------------- +// do not discriminate box and shell +// for Gravitational wave specially symmetric case +//|---------------------------------------------------------------- +void surface_integral::surf_Wave(double rex, cgh *GH, ShellPatch *SH, + var *chi, var *trK, + var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz, + var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz, + var *chix, var *chiy, var *chiz, + var *trKx, var *trKy, var *trKz, + var *Axxx, var *Axxy, var *Axxz, + var *Axyx, var *Axyy, var *Axyz, + var *Axzx, var *Axzy, var *Axzz, + var *Ayyx, var *Ayyy, var *Ayyz, + var *Ayzx, var *Ayzy, var *Ayzz, + var *Azzx, var *Azzy, var *Azzz, + var *Gamxxx, var *Gamxxy, var *Gamxxz, var *Gamxyy, var *Gamxyz, var *Gamxzz, + var *Gamyxx, var *Gamyxy, var *Gamyxz, var *Gamyyy, var *Gamyyz, var *Gamyzz, + var *Gamzxx, var *Gamzxy, var *Gamzxz, var *Gamzyy, var *Gamzyz, var *Gamzzz, + var *Rxx, var *Rxy, var *Rxz, var *Ryy, var *Ryz, var *Rzz, + int spinw, int maxl, int NN, double *RP, double *IP, + monitor *Monitor) // NN is the length of RP and IP +{ + const int InList = 62; + + MyList *DG_List = new MyList(chi); + DG_List->insert(trK); + DG_List->insert(gxx); + DG_List->insert(gxy); + DG_List->insert(gxz); + DG_List->insert(gyy); + DG_List->insert(gyz); + DG_List->insert(gzz); + DG_List->insert(Axx); + DG_List->insert(Axy); + DG_List->insert(Axz); + DG_List->insert(Ayy); + DG_List->insert(Ayz); + DG_List->insert(Azz); + DG_List->insert(chix); + DG_List->insert(chiy); + DG_List->insert(chiz); + DG_List->insert(trKx); + DG_List->insert(trKy); + DG_List->insert(trKz); + DG_List->insert(Axxx); + DG_List->insert(Axxy); + DG_List->insert(Axxz); + DG_List->insert(Axyx); + DG_List->insert(Axyy); + DG_List->insert(Axyz); + DG_List->insert(Axzx); + DG_List->insert(Axzy); + DG_List->insert(Axzz); + DG_List->insert(Ayyx); + DG_List->insert(Ayyy); + DG_List->insert(Ayyz); + DG_List->insert(Ayzx); + DG_List->insert(Ayzy); + DG_List->insert(Ayzz); + DG_List->insert(Azzx); + DG_List->insert(Azzy); + DG_List->insert(Azzz); + DG_List->insert(Gamxxx); + DG_List->insert(Gamxxy); + DG_List->insert(Gamxxz); + DG_List->insert(Gamxyy); + DG_List->insert(Gamxyz); + DG_List->insert(Gamxzz); + DG_List->insert(Gamyxx); + DG_List->insert(Gamyxy); + DG_List->insert(Gamyxz); + DG_List->insert(Gamyyy); + DG_List->insert(Gamyyz); + DG_List->insert(Gamyzz); + DG_List->insert(Gamzxx); + DG_List->insert(Gamzxy); + DG_List->insert(Gamzxz); + DG_List->insert(Gamzyy); + DG_List->insert(Gamzyz); + DG_List->insert(Gamzzz); + DG_List->insert(Rxx); + DG_List->insert(Rxy); + DG_List->insert(Rxz); + DG_List->insert(Ryy); + DG_List->insert(Ryz); + DG_List->insert(Rzz); + + int n; + double *pox[3]; + for (int i = 0; i < 3; i++) + pox[i] = new double[n_tot]; + for (n = 0; n < n_tot; n++) + { + pox[0][n] = rex * nx_g[n]; + pox[1][n] = rex * ny_g[n]; + pox[2][n] = rex * nz_g[n]; + } + + double *shellf; + shellf = new double[n_tot * InList]; + + SR_Interp_Points(DG_List, GH, SH, n_tot, pox, shellf); + + double *RP_out, *IP_out; + RP_out = new double[NN]; + IP_out = new double[NN]; + + for (int ii = 0; ii < NN; ii++) + { + RP_out[ii] = 0; + IP_out[ii] = 0; + } + + int mp, Lp, Nmin, Nmax; + + mp = n_tot / cpusize; + Lp = n_tot - cpusize * mp; + + if (Lp > myrank) + { + Nmin = myrank * mp + myrank; + Nmax = Nmin + mp; + } + else + { + Nmin = myrank * mp + Lp; + Nmax = Nmin + mp - 1; + } + + // theta part + double costheta, thetap; + double cosmphi, sinmphi; + + int i, j; + int lpsy = 0; + if (Symmetry == 0) + lpsy = 1; + else if (Symmetry == 1) + lpsy = 2; + else if (Symmetry == 2) + lpsy = 8; + + double psi4RR, psi4II; + double px, py, pz; + double pchi, ptrK, pgxx, pgxy, pgxz, pgyy, pgyz, pgzz; + double pAxx, pAxy, pAxz, pAyy, pAyz, pAzz; + double pchix, pchiy, pchiz; + double ptrKx, ptrKy, ptrKz; + double pAxxx, pAxxy, pAxxz; + double pAxyx, pAxyy, pAxyz; + double pAxzx, pAxzy, pAxzz; + double pAyyx, pAyyy, pAyyz; + double pAyzx, pAyzy, pAyzz; + double pAzzx, pAzzy, pAzzz; + double pGamxxx, pGamxxy, pGamxxz, pGamxyy, pGamxyz, pGamxzz; + double pGamyxx, pGamyxy, pGamyxz, pGamyyy, pGamyyz, pGamyzz; + double pGamzxx, pGamzxy, pGamzxz, pGamzyy, pGamzyz, pGamzzz; + double pRxx, pRxy, pRxz, pRyy, pRyz, pRzz; + for (n = Nmin; n <= Nmax; n++) + { + // need round off always + i = int(n / N_phi); // int(1.723) = 1, int(-1.732) = -1 + j = n - i * N_phi; + + int countlm = 0; + for (int pl = spinw; pl < maxl + 1; pl++) + for (int pm = -pl; pm < pl + 1; pm++) + { + for (int lp = 0; lp < lpsy; lp++) + { + px = pox[0][n]; + py = pox[1][n]; + pz = pox[2][n]; + pchi = shellf[InList * n]; + ptrK = shellf[InList * n + 1]; + pgxx = shellf[InList * n + 2]; + pgxy = shellf[InList * n + 3]; + pgxz = shellf[InList * n + 4]; + pgyy = shellf[InList * n + 5]; + pgyz = shellf[InList * n + 6]; + pgzz = shellf[InList * n + 7]; + pAxx = shellf[InList * n + 8]; + pAxy = shellf[InList * n + 9]; + pAxz = shellf[InList * n + 10]; + pAyy = shellf[InList * n + 11]; + pAyz = shellf[InList * n + 12]; + pAzz = shellf[InList * n + 13]; + pchix = shellf[InList * n + 14]; + pchiy = shellf[InList * n + 15]; + pchiz = shellf[InList * n + 16]; + ptrKx = shellf[InList * n + 17]; + ptrKy = shellf[InList * n + 18]; + ptrKz = shellf[InList * n + 19]; + pAxxx = shellf[InList * n + 20]; + pAxxy = shellf[InList * n + 21]; + pAxxz = shellf[InList * n + 22]; + pAxyx = shellf[InList * n + 23]; + pAxyy = shellf[InList * n + 24]; + pAxyz = shellf[InList * n + 25]; + pAxzx = shellf[InList * n + 26]; + pAxzy = shellf[InList * n + 27]; + pAxzz = shellf[InList * n + 28]; + pAyyx = shellf[InList * n + 29]; + pAyyy = shellf[InList * n + 30]; + pAyyz = shellf[InList * n + 31]; + pAyzx = shellf[InList * n + 32]; + pAyzy = shellf[InList * n + 33]; + pAyzz = shellf[InList * n + 34]; + pAzzx = shellf[InList * n + 35]; + pAzzy = shellf[InList * n + 36]; + pAzzz = shellf[InList * n + 37]; + pGamxxx = shellf[InList * n + 38]; + pGamxxy = shellf[InList * n + 39]; + pGamxxz = shellf[InList * n + 40]; + pGamxyy = shellf[InList * n + 41]; + pGamxyz = shellf[InList * n + 42]; + pGamxzz = shellf[InList * n + 43]; + pGamyxx = shellf[InList * n + 44]; + pGamyxy = shellf[InList * n + 45]; + pGamyxz = shellf[InList * n + 46]; + pGamyyy = shellf[InList * n + 47]; + pGamyyz = shellf[InList * n + 48]; + pGamyzz = shellf[InList * n + 49]; + pGamzxx = shellf[InList * n + 50]; + pGamzxy = shellf[InList * n + 51]; + pGamzxz = shellf[InList * n + 52]; + pGamzyy = shellf[InList * n + 53]; + pGamzyz = shellf[InList * n + 54]; + pGamzzz = shellf[InList * n + 55]; + pRxx = shellf[InList * n + 56]; + pRxy = shellf[InList * n + 57]; + pRxz = shellf[InList * n + 58]; + pRyy = shellf[InList * n + 59]; + pRyz = shellf[InList * n + 60]; + pRzz = shellf[InList * n + 61]; + switch (lp) + { + case 0: //+++ (theta, phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = sin(pm * (j + 0.5) * dphi); + break; + case 1: //++- (pi-theta, phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = sin(pm * (j + 0.5) * dphi); + pz = -pz; + pgxz = -pgxz; + pgyz = -pgyz; + pAxz = -pAxz; + pAyz = -pAyz; + pchiz = -pchiz; + ptrKz = -ptrKz; + pAxxz = -pAxxz; + pAxyz = -pAxyz; + pAxzx = -pAxzx; + pAxzy = -pAxzy; + pAyyz = -pAyyz; + pAyzx = -pAyzx; + pAyzy = -pAyzy; + pAzzz = -pAzzz; + pGamxxz = -pGamxxz; + pGamxyz = -pGamxyz; + pGamyxz = -pGamyxz; + pGamyyz = -pGamyyz; + pGamzxx = -pGamzxx; + pGamzxy = -pGamzxy; + pGamzyy = -pGamzyy; + pGamzzz = -pGamzzz; + pRxz = -pRxz; + pRyz = -pRyz; + break; + case 2: //+-+ (theta, 2*pi-phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = -sin(pm * (j + 0.5) * dphi); + py = -py; + pgxy = -pgxy; + pgyz = -pgyz; + pAxy = -pAxy; + pAyz = -pAyz; + pchiy = -pchiy; + ptrKy = -ptrKy; + pAxxy = -pAxxy; + pAxyx = -pAxyx; + pAxyz = -pAxyz; + pAxzy = -pAxzy; + pAyyy = -pAyyy; + pAyzx = -pAyzx; + pAyzz = -pAyzz; + pAzzy = -pAzzy; + pGamxxy = -pGamxxy; + pGamxyz = -pGamxyz; + pGamyxx = -pGamyxx; + pGamyxz = -pGamyxz; + pGamyyy = -pGamyyy; + pGamyzz = -pGamyzz; + pGamzxy = -pGamzxy; + pGamzyz = -pGamzyz; + pRxy = -pRxy; + pRyz = -pRyz; + break; + case 3: //+-- (pi-theta, 2*pi-phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (j + 0.5) * dphi); + sinmphi = -sin(pm * (j + 0.5) * dphi); + py = -py; + pz = -pz; + pgxy = -pgxy; + pgxz = -pgxz; + pAxy = -pAxy; + pAxz = -pAxz; + pchiy = -pchiy; + pchiz = -pchiz; + ptrKy = -ptrKy; + ptrKz = -ptrKz; + pAxxy = -pAxxy; + pAxxz = -pAxxz; + pAxyx = -pAxyx; + pAxzx = -pAxzx; + pAyyy = -pAyyy; + pAyyz = -pAyyz; + pAyzy = -pAyzy; + pAyzz = -pAyzz; + pAzzy = -pAzzy; + pAzzz = -pAzzz; + pGamxxy = -pGamxxy; + pGamxxz = -pGamxxz; + pGamyxx = -pGamyxx; + pGamyyy = -pGamyyy; + pGamyyz = -pGamyyz; + pGamyzz = -pGamyzz; + pGamzxx = -pGamzxx; + pGamzyy = -pGamzyy; + pGamzyz = -pGamzyz; + pGamzzz = -pGamzzz; + pRxy = -pRxy; + pRxz = -pRxz; + break; + case 4: //-++ (theta, pi-phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); + px = -px; + pgxy = -pgxy; + pgxz = -pgxz; + pAxy = -pAxy; + pAxz = -pAxz; + pchix = -pchix; + ptrKx = -ptrKx; + pAxxx = -pAxxx; + pAxyy = -pAxyy; + pAxyz = -pAxyz; + pAxzy = -pAxzy; + pAxzz = -pAxzz; + pAyyx = -pAyyx; + pAyzx = -pAyzx; + pAzzx = -pAzzx; + pGamxxx = -pGamxxx; + pGamxyy = -pGamxyy; + pGamxyz = -pGamxyz; + pGamxzz = -pGamxzz; + pGamyxy = -pGamyxy; + pGamyxz = -pGamyxz; + pGamzxy = -pGamzxy; + pGamzxz = -pGamzxz; + pRxy = -pRxy; + pRxz = -pRxz; + break; + case 5: //-+- (pi-theta, pi-phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (PI - (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI - (j + 0.5) * dphi)); + px = -px; + pz = -pz; + pgxy = -pgxy; + pgyz = -pgyz; + pAxy = -pAxy; + pAyz = -pAyz; + pchix = -pchix; + pchiz = -pchiz; + ptrKx = -ptrKx; + ptrKz = -ptrKz; + pAxxx = -pAxxx; + pAxxz = -pAxxz; + pAxyy = -pAxyy; + pAxzx = -pAxzx; + pAxzz = -pAxzz; + pAyyx = -pAyyx; + pAyyz = -pAyyz; + pAyzy = -pAyzy; + pAzzx = -pAzzx; + pAzzz = -pAzzz; + pGamxxx = -pGamxxx; + pGamxxz = -pGamxxz; + pGamxyy = -pGamxyy; + pGamxzz = -pGamxzz; + pGamyxy = -pGamyxy; + pGamyyz = -pGamyyz; + pGamzxx = -pGamzxx; + pGamzxz = -pGamzxz; + pGamzyy = -pGamzyy; + pGamzzz = -pGamzzz; + pRxy = -pRxy; + pRyz = -pRyz; + break; + case 6: //--+ (theta, pi+phi) + costheta = arcostheta[i]; + cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); + px = -px; + py = -py; + pgxz = -pgxz; + pgyz = -pgyz; + pAxz = -pAxz; + pAyz = -pAyz; + pchix = -pchix; + pchiy = -pchiy; + ptrKx = -ptrKx; + ptrKy = -ptrKy; + pAxxx = -pAxxx; + pAxxy = -pAxxy; + pAxyx = -pAxyx; + pAxyy = -pAxyy; + pAxzz = -pAxzz; + pAyyx = -pAyyx; + pAyyy = -pAyyy; + pAyzz = -pAyzz; + pAzzx = -pAzzx; + pAzzy = -pAzzy; + pGamxxx = -pGamxxx; + pGamxxy = -pGamxxy; + pGamxyy = -pGamxyy; + pGamxzz = -pGamxzz; + pGamyxx = -pGamyxx; + pGamyxy = -pGamyxy; + pGamyyy = -pGamyyy; + pGamyzz = -pGamyzz; + pGamzxz = -pGamzxz; + pGamzyz = -pGamzyz; + pRxz = -pRxz; + pRyz = -pRyz; + break; + case 7: //--- (pi-theta, pi+phi) + costheta = -arcostheta[i]; + cosmphi = cos(pm * (PI + (j + 0.5) * dphi)); + sinmphi = sin(pm * (PI + (j + 0.5) * dphi)); + px = -px; + py = -py; + pz = -pz; + pchix = -pchix; + pchiy = -pchiy; + pchiz = -pchiz; + ptrKx = -ptrKx; + ptrKy = -ptrKy; + ptrKz = -ptrKz; + pAxxx = -pAxxx; + pAxxy = -pAxxy; + pAxxz = -pAxxz; + pAxyx = -pAxyx; + pAxyy = -pAxyy; + pAxyz = -pAxyz; + pAxzx = -pAxzx; + pAxzy = -pAxzy; + pAxzz = -pAxzz; + pAyyx = -pAyyx; + pAyyy = -pAyyy; + pAyyz = -pAyyz; + pAyzx = -pAyzx; + pAyzy = -pAyzy; + pAyzz = -pAyzz; + pAzzx = -pAzzx; + pAzzy = -pAzzy; + pAzzz = -pAzzz; + pGamxxx = -pGamxxx; + pGamxxy = -pGamxxy; + pGamxxz = -pGamxxz; + pGamxyy = -pGamxyy; + pGamxyz = -pGamxyz; + pGamxzz = -pGamxzz; + pGamyxx = -pGamyxx; + pGamyxy = -pGamyxy; + pGamyxz = -pGamyxz; + pGamyyy = -pGamyyy; + pGamyyz = -pGamyyz; + pGamyzz = -pGamyzz; + pGamzxx = -pGamzxx; + pGamzxy = -pGamzxy; + pGamzxz = -pGamzxz; + pGamzyy = -pGamzyy; + pGamzyz = -pGamzyz; + pGamzzz = -pGamzzz; + } + + f_getnp4_point(px, py, pz, pchi, ptrK, + pgxx, pgxy, pgxz, pgyy, pgyz, pgzz, + pAxx, pAxy, pAxz, pAyy, pAyz, pAzz, + pchix, pchiy, pchiz, + ptrKx, ptrKy, ptrKz, + pAxxx, pAxxy, pAxxz, + pAxyx, pAxyy, pAxyz, + pAxzx, pAxzy, pAxzz, + pAyyx, pAyyy, pAyyz, + pAyzx, pAyzy, pAyzz, + pAzzx, pAzzy, pAzzz, + pGamxxx, pGamxxy, pGamxxz, pGamxyy, pGamxyz, pGamxzz, + pGamyxx, pGamyxy, pGamyxz, pGamyyy, pGamyyz, pGamyzz, + pGamzxx, pGamzxy, pGamzxz, pGamzyy, pGamzyz, pGamzzz, + pRxx, pRxy, pRxz, pRyy, pRyz, pRzz, + psi4RR, psi4II); + + thetap = sqrt((2 * pl + 1.0) / 4.0 / PI) * misc::Wigner_d_function(pl, pm, spinw, costheta); // note the variation from -2 to 2 + + // find back the one + pchi = pchi + 1; +#ifdef GaussInt + // wtcostheta is even function respect costheta + RP_out[countlm] = RP_out[countlm] + thetap / pchi / pchi * (psi4RR * cosmphi + psi4II * sinmphi) * wtcostheta[i]; + IP_out[countlm] = IP_out[countlm] + thetap / pchi / pchi * (psi4II * cosmphi - psi4RR * sinmphi) * wtcostheta[i]; +#else + RP_out[countlm] = RP_out[countlm] + thetap / pchi / pchi * (psi4RR * cosmphi + psi4II * sinmphi); + IP_out[countlm] = IP_out[countlm] + thetap / pchi / pchi * (psi4II * cosmphi - psi4RR * sinmphi); +#endif + } + countlm++; // no sanity check for countlm and NN which should be noted in the input parameters + } + } + + for (int ii = 0; ii < NN; ii++) + { +#ifdef GaussInt + RP_out[ii] = RP_out[ii] * rex * dphi; + IP_out[ii] = IP_out[ii] * rex * dphi; +#else + RP_out[ii] = RP_out[ii] * rex * dphi * dcostheta; + IP_out[ii] = IP_out[ii] * rex * dphi * dcostheta; +#endif + } + //|------+ Communicate and sum the results from each processor. + + { + double *RPIP_out = new double[2 * NN]; + double *RPIP = new double[2 * NN]; + memcpy(RPIP_out, RP_out, NN * sizeof(double)); + memcpy(RPIP_out + NN, IP_out, NN * sizeof(double)); + MPI_Allreduce(RPIP_out, RPIP, 2 * NN, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + memcpy(RP, RPIP, NN * sizeof(double)); + memcpy(IP, RPIP + NN, NN * sizeof(double)); + delete[] RPIP_out; + delete[] RPIP; + } + + //|------= Free memory. + + delete[] pox[0]; + delete[] pox[1]; + delete[] pox[2]; + delete[] shellf; + delete[] RP_out; + delete[] IP_out; + DG_List->clearList(); +} +//|---------------------------------------------------------------- +// do not discriminate box and shell +//|---------------------------------------------------------------- +bool surface_integral::SR_Interp_Points(MyList *VarList, cgh *GH, ShellPatch *SH, + int NN, double **XX, double *Shellf) +{ + MyList *varl; + int num_var = 0; + varl = VarList; + while (varl) + { + num_var++; + varl = varl->next; + } + + double pox[3]; + for (int i = 0; i < NN; i++) + { + for (int j = 0; j < 3; j++) + pox[j] = XX[j][i]; + int lev = GH->levels - 1; + bool notfound = true; + + while (notfound) + { + if (lev < 0) + { + if (SH) + { + if (SH->Interp_One_Point(VarList, pox, Shellf + i * num_var, Symmetry)) + { + return true; + } + if (myrank == 0) + cout << "surface_integral::SR_Interp_Points point (" << pox[0] << "," << pox[1] << "," << pox[2] << ") is out of cgh and shell domain!" << endl; + } + else + { + if (myrank == 0) + cout << "surface_integral::SR_Interp_Points: point (" << pox[0] << "," << pox[1] << "," << pox[2] << ") is out of cgh domain!" << endl; + } + return false; + } + MyList *Pp = GH->PatL[lev]; + while (Pp) + { + if (Pp->data->Interp_ONE_Point(VarList, pox, Shellf + i * num_var, Symmetry)) + { + notfound = false; + break; + } + Pp = Pp->next; + } + lev--; + } + } + return true; +}