修改transfer

2026-02-12 00:58:18 +08:00
parent 86704100ec
commit 93362baee5
1 changed files with 36 additions and 30 deletions
--- a/AMSS_NCKU_source/Parallel.C
+++ b/AMSS_NCKU_source/Parallel.C
@@ -72,14 +72,14 @@ int Parallel::partition3(int *nxyz, int split_size, int *min_width, int cpusize,
  int n;

  block_size = shape[0] * shape[1] * shape[2];
-  n = Mymax(1, (block_size + split_size / 2) / split_size);
+  n =  Mymax(1, (block_size + split_size / 2) / split_size);

  maxnx = Mymax(1, shape[0] / min_width[0]);
-  maxnx = Mymin(cpusize, maxnx);
+  maxnx =  Mymin(cpusize, maxnx);
  maxny = Mymax(1, shape[1] / min_width[1]);
-  maxny = Mymin(cpusize, maxny);
+  maxny =  Mymin(cpusize, maxny);
  maxnz = Mymax(1, shape[2] / min_width[2]);
-  maxnz = Mymin(cpusize, maxnz);
+  maxnz =  Mymin(cpusize, maxnz);
  fx = (double)shape[0] / (shape[0] + shape[1] + shape[2]);
  fy = (double)shape[1] / (shape[0] + shape[1] + shape[2]);
  fz = (double)shape[2] / (shape[0] + shape[1] + shape[2]);
@@ -3504,7 +3504,7 @@ int Parallel::data_packermix(double *data, MyList<Parallel::gridseg> *src, MyLis

  return size_out;
 }
-//
+
 void Parallel::transfer(MyList<Parallel::gridseg> **src, MyList<Parallel::gridseg> **dst,
                        MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /*target */,
                        int Symmetry)
@@ -3512,7 +3512,16 @@ void Parallel::transfer(MyList<Parallel::gridseg> **src, MyList<Parallel::gridse
  int myrank, cpusize;
  MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
  MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
-
+/*
+  // Early exit: if no gridseg pairs exist for any node, skip all work
+  {
+    bool has_segs = false;
+    for (int n = 0; n < cpusize; n++) {
+      if (src[n] && dst[n]) { has_segs = true; break; }
+    }
+    if (!has_segs) return;
+  }
+*/
  int node;

  MPI_Request *reqs;
@@ -3527,49 +3536,46 @@ void Parallel::transfer(MyList<Parallel::gridseg> **src, MyList<Parallel::gridse
  int length;

  for (node = 0; node < cpusize; node++)
-  {
    send_data[node] = rec_data[node] = 0;
+
+  // 第1遍: 本地拷贝 + 所有 Irecv（先 post 接收，减少 unexpected message buffering）
+  for (node = 0; node < cpusize; node++)
+  {
    if (node == myrank)
    {
      if (length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry))
      {
        rec_data[node] = new double[length];
-        if (!rec_data[node])
-        {
-          cout << "out of memory when new in short transfer, place 1" << endl;
-          MPI_Abort(MPI_COMM_WORLD, 1);
-        }
        data_packer(rec_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
      }
    }
    else
    {
-      // send from this cpu to cpu#node
-      if (length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry))
-      {
-        send_data[node] = new double[length];
-        if (!send_data[node])
-        {
-          cout << "out of memory when new in short transfer, place 2" << endl;
-          MPI_Abort(MPI_COMM_WORLD, 1);
-        }
-        data_packer(send_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
-        MPI_Isend((void *)send_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++);
-      }
      // receive from cpu#node to this cpu
      if (length = data_packer(0, src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry))
      {
        rec_data[node] = new double[length];
-        if (!rec_data[node])
-        {
-          cout << "out of memory when new in short transfer, place 3" << endl;
-          MPI_Abort(MPI_COMM_WORLD, 1);
-        }
        MPI_Irecv((void *)rec_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++);
      }
    }
  }
-  // wait for all requests to complete
+
+  // 第2遍: 所有 Isend
+  for (node = 0; node < cpusize; node++)
+  {
+    if (node == myrank) continue;
+    // send from this cpu to cpu#node
+    if (length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry))
+    {
+      send_data[node] = new double[length];
+      data_packer(send_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
+      MPI_Isend((void *)send_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++);
+    }
+  }
+
+  // 一次性等待所有通信完成
+  //for (int i = 0; i < req_no; i++)  
+    //MPI_Wait(&reqs[i], &stats[i]);
  MPI_Waitall(req_no, reqs, stats);

  for (node = 0; node < cpusize; node++)