From b32675ba991e31d63f43ec94deb9424b76372d91 Mon Sep 17 00:00:00 2001 From: jaunatisblue Date: Thu, 12 Feb 2026 03:22:46 +0800 Subject: [PATCH] =?UTF-8?q?=20=201.=20Pass=201=EF=BC=88357-395=E8=A1=8C?= =?UTF-8?q?=EF=BC=89=EF=BC=9A=E9=81=8D=E5=8E=86=E6=89=80=E6=9C=89=20Patch?= =?UTF-8?q?=EF=BC=8C=E5=AF=B9=E6=AF=8F=E4=B8=AA=20block=20=E8=AE=A1?= =?UTF-8?q?=E7=AE=97=E5=90=ABghost=20zone=20=E7=9A=84=E5=AE=9E=E9=99=85?= =?UTF-8?q?=E4=BD=93=E7=A7=AF=EF=BC=8C=E5=AD=98=E5=85=A5=20block=5Fvolumes?= =?UTF-8?q?=20=20=202.=20Greedy=20LPT=EF=BC=88397-414=E8=A1=8C=EF=BC=89?= =?UTF-8?q?=EF=BC=9A=E6=8C=89=E4=BD=93=E7=A7=AF=E4=BB=8E=E5=A4=A7=E5=88=B0?= =?UTF-8?q?=E5=B0=8F=E6=8E=92=E5=BA=8F=EF=BC=8C=E4=BE=9D=E6=AC=A1=E5=88=86?= =?UTF-8?q?=E9=85=8D=E7=BB=99=E5=BD=93=E5=89=8D=E8=B4=9F=E8=BD=BD=E6=9C=80?= =?UTF-8?q?=E5=B0=8F=E7=9A=84=20rank=20=20=203.=20Pass=202=EF=BC=88416-555?= =?UTF-8?q?=E8=A1=8C=EF=BC=89=EF=BC=9A=E5=8E=9F=E6=9D=A5=E7=9A=84=20block?= =?UTF-8?q?=E5=88=9B=E5=BB=BA=E5=BE=AA=E7=8E=AF=EF=BC=8C=E4=BD=86=E7=94=A8?= =?UTF-8?q?=20assigned=5Franks[block=5Fidx++]=20=E6=9B=BF=E4=BB=A3=20n=5Fr?= =?UTF-8?q?ank++=EF=BC=8CBlock=20=20=20=E6=9E=84=E9=80=A0=E6=97=B6?= =?UTF-8?q?=E7=9B=B4=E6=8E=A5=E6=8B=BF=E5=88=B0=E6=AD=A3=E7=A1=AE=E7=9A=84?= =?UTF-8?q?=20rank=EF=BC=8C=E5=86=85=E5=AD=98=E5=88=86=E9=85=8D=E5=9C=A8?= =?UTF-8?q?=E5=AF=B9=E7=9A=84=E8=BF=9B=E7=A8=8B=E4=B8=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- AMSS_NCKU_source/Parallel.C | 92 ++++++++++++++++++++++++++++--------- 1 file changed, 70 insertions(+), 22 deletions(-) diff --git a/AMSS_NCKU_source/Parallel.C b/AMSS_NCKU_source/Parallel.C index c0df582..33c0458 100644 --- a/AMSS_NCKU_source/Parallel.C +++ b/AMSS_NCKU_source/Parallel.C @@ -4,6 +4,8 @@ #include "prolongrestrict.h" #include "misc.h" #include "parameters.h" +#include +#include int Parallel::partition1(int &nx, int split_size, int min_width, int cpusize, int shape) // special for 1 diemnsion { @@ -352,14 +354,73 @@ MyList *Parallel::distribute(MyList *PatchLIST, int cpusize, int i split_size = Mymax(min_size, block_size / nodes); split_size = Mymax(1, split_size); - int n_rank = 0; + // Pass 1: compute block volumes for greedy rank assignment + std::vector block_volumes; PLi = PatchLIST; int reacpu = 0; while (PLi) { Patch *PP = PLi->data; - reacpu += partition3(nxyz, split_size, mmin_width, nodes, PP->shape); + int ibbox_here[2 * dim]; + for (int i = 0; i < nxyz[0]; i++) + for (int j = 0; j < nxyz[1]; j++) + for (int k = 0; k < nxyz[2]; k++) + { + ibbox_here[0] = (PP->shape[0] * i) / nxyz[0]; + ibbox_here[3] = (PP->shape[0] * (i + 1)) / nxyz[0] - 1; + ibbox_here[1] = (PP->shape[1] * j) / nxyz[1]; + ibbox_here[4] = (PP->shape[1] * (j + 1)) / nxyz[1] - 1; + ibbox_here[2] = (PP->shape[2] * k) / nxyz[2]; + ibbox_here[5] = (PP->shape[2] * (k + 1)) / nxyz[2] - 1; + if (periodic) + { + for (int d = 0; d < dim; d++) { ibbox_here[d] -= ghost_width; ibbox_here[dim + d] += ghost_width; } + } + else + { + ibbox_here[0] = Mymax(0, ibbox_here[0] - ghost_width); + ibbox_here[3] = Mymin(PP->shape[0] - 1, ibbox_here[3] + ghost_width); + ibbox_here[1] = Mymax(0, ibbox_here[1] - ghost_width); + ibbox_here[4] = Mymin(PP->shape[1] - 1, ibbox_here[4] + ghost_width); + ibbox_here[2] = Mymax(0, ibbox_here[2] - ghost_width); + ibbox_here[5] = Mymin(PP->shape[2] - 1, ibbox_here[5] + ghost_width); + } + long vol = 1; + for (int d = 0; d < dim; d++) + vol *= (ibbox_here[dim + d] - ibbox_here[d] + 1); + block_volumes.push_back(vol); + } + PLi = PLi->next; + } + + // Greedy LPT: sort by volume descending, assign each to least-loaded rank + std::vector assigned_ranks(block_volumes.size()); + { + std::vector order(block_volumes.size()); + for (int i = 0; i < (int)order.size(); i++) order[i] = i; + std::sort(order.begin(), order.end(), [&](int a, int b) { + return block_volumes[a] > block_volumes[b]; + }); + std::vector load(cpusize, 0); + for (int idx : order) + { + int min_r = 0; + for (int r = 1; r < cpusize; r++) + if (load[r] < load[min_r]) min_r = r; + assigned_ranks[idx] = min_r; + load[min_r] += block_volumes[idx]; + } + } + + // Pass 2: create blocks with pre-assigned ranks + int block_idx = 0; + PLi = PatchLIST; + while (PLi) + { + Patch *PP = PLi->data; + + partition3(nxyz, split_size, mmin_width, nodes, PP->shape); Block *ng0, *ng; int shape_here[dim], ibbox_here[2 * dim]; @@ -443,10 +504,7 @@ MyList *Parallel::distribute(MyList *PatchLIST, int cpusize, int i int shape_res[dim * pices]; double bbox_res[2 * dim * pices]; misc::dividBlock(dim, shape_here, bbox_here, pices, picef, shape_res, bbox_res, min_width); - ng = ng0 = new Block(dim, shape_res, bbox_res, n_rank++, ingfsi, fngfsi, PP->lev, 0); // delete through KillBlocks - - // if(n_rank==cpusize) {n_rank=0; cerr<<"place one!!"<lev, 0); // delete through KillBlocks // ng->checkBlock(); if (BlL) BlL->insert(ng); @@ -455,22 +513,19 @@ MyList *Parallel::distribute(MyList *PatchLIST, int cpusize, int i for (int i = 1; i < pices; i++) { - ng = new Block(dim, shape_res + i * dim, bbox_res + i * 2 * dim, n_rank++, ingfsi, fngfsi, PP->lev, i); // delete through KillBlocks - // if(n_rank==cpusize) {n_rank=0; cerr<<"place two!! "<lev, i); // delete through KillBlocks // ng->checkBlock(); BlL->insert(ng); } } #else - ng = ng0 = new Block(dim, shape_here, bbox_here, n_rank++, ingfsi, fngfsi, PP->lev); // delete through KillBlocks + ng = ng0 = new Block(dim, shape_here, bbox_here, assigned_ranks[block_idx++], ingfsi, fngfsi, PP->lev); // delete through KillBlocks // ng->checkBlock(); if (BlL) BlL->insert(ng); else BlL = new MyList(ng); // delete through KillBlocks #endif - if (n_rank == cpusize) - n_rank = 0; // set PP->blb if (i == 0 && j == 0 && k == 0) @@ -3524,10 +3579,8 @@ void Parallel::transfer(MyList **src, MyList **src, MyList **src, MyList **src, MyList **src, MyList