@@ -5,6 +5,8 @@
# include "misc.h"
# include "parameters.h"
# include <set>
# include <iostream>
# include <vector>
int Parallel : : partition1 ( int & nx , int split_size , int min_width , int cpusize , int shape ) // special for 1 diemnsion
{
@@ -501,6 +503,444 @@ MyList<Block> *Parallel::distribute(MyList<Patch> *PatchLIST, int cpusize, int i
return BlL ;
}
MyList < Block > * Parallel : : distribute_new ( MyList < Patch > * PatchLIST , int cpusize , int ingfsi , int fngfsi ,
bool periodic , int nodes )
{
# ifdef USE_GPU_DIVIDE
double cpu_part , gpu_part ;
map < string , double > : : iterator iter ;
iter = parameters : : dou_par . find ( " cpu part " ) ;
if ( iter ! = parameters : : dou_par . end ( ) )
{
cpu_part = iter - > second ;
}
else
{
int myrank ;
MPI_Comm_rank ( MPI_COMM_WORLD , & myrank ) ;
// read parameter from file
const int LEN = 256 ;
char pline [ LEN ] ;
string str , sgrp , skey , sval ;
int sind ;
char pname [ 50 ] ;
{
map < string , string > : : iterator iter = parameters : : str_par . find ( " inputpar " ) ;
if ( iter ! = parameters : : str_par . end ( ) )
{
strcpy ( pname , ( iter - > second ) . c_str ( ) ) ;
}
else
{
cout < < " Error inputpar " < < endl ;
exit ( 0 ) ;
}
}
ifstream inf ( pname , ifstream : : in ) ;
if ( ! inf . good ( ) & & myrank = = 0 )
{
cout < < " Can not open parameter file " < < pname < < endl ;
MPI_Abort ( MPI_COMM_WORLD , 1 ) ;
}
for ( int i = 1 ; inf . good ( ) ; i + + )
{
inf . getline ( pline , LEN ) ;
str = pline ;
int status = misc : : parse_parts ( str , sgrp , skey , sval , sind ) ;
if ( status = = - 1 )
{
cout < < " error reading parameter file " < < pname < < " in line " < < i < < endl ;
MPI_Abort ( MPI_COMM_WORLD , 1 ) ;
}
else if ( status = = 0 )
continue ;
if ( sgrp = = " ABE " )
{
if ( skey = = " cpu part " )
cpu_part = atof ( sval . c_str ( ) ) ;
}
}
inf . close ( ) ;
parameters : : dou_par . insert ( map < string , double > : : value_type ( " cpu part " , cpu_part ) ) ;
}
iter = parameters : : dou_par . find ( " gpu part " ) ;
if ( iter ! = parameters : : dou_par . end ( ) )
{
gpu_part = iter - > second ;
}
else
{
int myrank ;
MPI_Comm_rank ( MPI_COMM_WORLD , & myrank ) ;
// read parameter from file
const int LEN = 256 ;
char pline [ LEN ] ;
string str , sgrp , skey , sval ;
int sind ;
char pname [ 50 ] ;
{
map < string , string > : : iterator iter = parameters : : str_par . find ( " inputpar " ) ;
if ( iter ! = parameters : : str_par . end ( ) )
{
strcpy ( pname , ( iter - > second ) . c_str ( ) ) ;
}
else
{
cout < < " Error inputpar " < < endl ;
exit ( 0 ) ;
}
}
ifstream inf ( pname , ifstream : : in ) ;
if ( ! inf . good ( ) & & myrank = = 0 )
{
cout < < " Can not open parameter file " < < pname < < endl ;
MPI_Abort ( MPI_COMM_WORLD , 1 ) ;
}
for ( int i = 1 ; inf . good ( ) ; i + + )
{
inf . getline ( pline , LEN ) ;
str = pline ;
int status = misc : : parse_parts ( str , sgrp , skey , sval , sind ) ;
if ( status = = - 1 )
{
cout < < " error reading parameter file " < < pname < < " in line " < < i < < endl ;
MPI_Abort ( MPI_COMM_WORLD , 1 ) ;
}
else if ( status = = 0 )
continue ;
if ( sgrp = = " ABE " )
{
if ( skey = = " gpu part " )
gpu_part = atof ( sval . c_str ( ) ) ;
}
}
inf . close ( ) ;
parameters : : dou_par . insert ( map < string , double > : : value_type ( " gpu part " , gpu_part ) ) ;
}
if ( nodes = = 0 )
nodes = cpusize / 2 ;
# else
if ( nodes = = 0 )
nodes = cpusize ;
# endif
if ( dim ! = 3 )
{
cout < < " distrivute: now we only support 3-dimension " < < endl ;
MPI_Abort ( MPI_COMM_WORLD , 1 ) ;
}
MyList < Block > * BlL = 0 ;
int split_size , min_size , block_size = 0 ;
int min_width = 2 * Mymax ( ghost_width , buffer_width ) ;
int nxyz [ dim ] , mmin_width [ dim ] , min_shape [ dim ] ;
MyList < Patch > * PLi = PatchLIST ;
for ( int i = 0 ; i < dim ; i + + )
min_shape [ i ] = PLi - > data - > shape [ i ] ;
int lev = PLi - > data - > lev ;
PLi = PLi - > next ;
while ( PLi )
{
Patch * PP = PLi - > data ;
for ( int i = 0 ; i < dim ; i + + )
min_shape [ i ] = Mymin ( min_shape [ i ] , PP - > shape [ i ] ) ;
if ( lev ! = PLi - > data - > lev )
cout < < " Parallel::distribute CAUSTION: meet Patches for different level: " < < lev < < " and " < < PLi - > data - > lev < < endl ;
PLi = PLi - > next ;
}
for ( int i = 0 ; i < dim ; i + + )
mmin_width [ i ] = Mymin ( min_width , min_shape [ i ] ) ;
min_size = mmin_width [ 0 ] ;
for ( int i = 1 ; i < dim ; i + + )
min_size = min_size * mmin_width [ i ] ;
PLi = PatchLIST ;
while ( PLi )
{
Patch * PP = PLi - > data ;
// PP->checkPatch(true);
int bs = PP - > shape [ 0 ] ;
for ( int i = 1 ; i < dim ; i + + )
bs = bs * PP - > shape [ i ] ;
block_size = block_size + bs ;
PLi = PLi - > next ;
}
split_size = Mymax ( min_size , block_size / nodes ) ;
split_size = Mymax ( 1 , split_size ) ;
int n_rank = 0 ;
PLi = PatchLIST ;
int reacpu = 0 ;
int current_block_id = 0 ;
while ( PLi ) {
Block * ng0 , * ng ;
bool first_block_in_patch = true ;
Patch * PP = PLi - > data ;
reacpu + = partition3 ( nxyz , split_size , mmin_width , nodes , PP - > shape ) ;
for ( int i = 0 ; i < nxyz [ 0 ] ; i + + )
for ( int j = 0 ; j < nxyz [ 1 ] ; j + + )
for ( int k = 0 ; k < nxyz [ 2 ] ; k + + )
{
// --- 1. 定义局部变量 ---
int ibbox_here [ 6 ] , shape_here [ 3 ] ;
double bbox_here [ 6 ] , dd ;
Block * current_ng_start = nullptr ; // 本次循环产生的第一个(或唯一一个)块
ibbox_here [ 0 ] = ( PP - > shape [ 0 ] * i ) / nxyz [ 0 ] ;
ibbox_here [ 3 ] = ( PP - > shape [ 0 ] * ( i + 1 ) ) / nxyz [ 0 ] - 1 ;
ibbox_here [ 1 ] = ( PP - > shape [ 1 ] * j ) / nxyz [ 1 ] ;
ibbox_here [ 4 ] = ( PP - > shape [ 1 ] * ( j + 1 ) ) / nxyz [ 1 ] - 1 ;
ibbox_here [ 2 ] = ( PP - > shape [ 2 ] * k ) / nxyz [ 2 ] ;
ibbox_here [ 5 ] = ( PP - > shape [ 2 ] * ( k + 1 ) ) / nxyz [ 2 ] - 1 ;
int core_blocks [ ] = { 27 , 28 , 35 , 36 } ; //后续改为传入数组
if ( current_block_id = = core_blocks [ 0 ] | | current_block_id = = core_blocks [ 1 ] | |
current_block_id = = core_blocks [ 2 ] | | current_block_id = = core_blocks [ 3 ] )
{
int aim_block [ 4 ] ;
switch ( current_block_id )
{
case 27 :
aim_block [ 0 ] = 24 ;
aim_block [ 1 ] = 25 ;
aim_block [ 2 ] = 26 ;
aim_block [ 3 ] = 27 ;
break ;
case 28 :
aim_block [ 0 ] = 28 ;
aim_block [ 1 ] = 29 ;
aim_block [ 2 ] = 30 ;
aim_block [ 3 ] = 23 ;
break ;
case 35 :
aim_block [ 0 ] = 40 ;
aim_block [ 1 ] = 33 ;
aim_block [ 2 ] = 34 ;
aim_block [ 3 ] = 35 ;
break ;
case 36 :
aim_block [ 0 ] = 36 ;
aim_block [ 1 ] = 37 ;
aim_block [ 2 ] = 38 ;
aim_block [ 3 ] = 39 ;
break ;
}
Block * split_first_block = nullptr ;
Block * split_last_block = nullptr ;
splitHotspotBlock ( BlL , dim , ibbox_here [ 0 ] , ibbox_here [ 3 ] , ibbox_here [ 1 ] , ibbox_here [ 4 ] , ibbox_here [ 2 ] , ibbox_here [ 5 ] ,
PP , aim_block [ 0 ] , aim_block [ 1 ] , aim_block [ 2 ] , aim_block [ 3 ] , ingfsi , fngfsi , periodic , split_first_block , split_last_block ) ;
current_ng_start = split_first_block ;
ng = split_last_block ;
}
else
{
// B. 普通块逻辑 (含 Ghost 扩张)
if ( periodic ) {
for ( int d = 0 ; d < 3 ; d + + ) {
ibbox_here [ d ] - = ghost_width ;
ibbox_here [ d + 3 ] + = ghost_width ;
}
} else {
ibbox_here [ 0 ] = Mymax ( 0 , ibbox_here [ 0 ] - ghost_width ) ;
ibbox_here [ 3 ] = Mymin ( PP - > shape [ 0 ] - 1 , ibbox_here [ 3 ] + ghost_width ) ;
ibbox_here [ 1 ] = Mymax ( 0 , ibbox_here [ 1 ] - ghost_width ) ;
ibbox_here [ 4 ] = Mymin ( PP - > shape [ 1 ] - 1 , ibbox_here [ 4 ] + ghost_width ) ;
ibbox_here [ 2 ] = Mymax ( 0 , ibbox_here [ 2 ] - ghost_width ) ;
ibbox_here [ 5 ] = Mymin ( PP - > shape [ 2 ] - 1 , ibbox_here [ 5 ] + ghost_width ) ;
}
for ( int d = 0 ; d < 3 ; d + + ) shape_here [ d ] = ibbox_here [ d + 3 ] - ibbox_here [ d ] + 1 ;
// 物理坐标计算 (根据你的宏定义 Cell/Vertex)
# ifdef Vertex
# ifdef Cell
# error Both Cell and Vertex are defined
# endif
// 0--4, 5--10
dd = ( PP - > bbox [ 3 ] - PP - > bbox [ 0 ] ) / ( PP - > shape [ 0 ] - 1 ) ;
bbox_here [ 0 ] = PP - > bbox [ 0 ] + ibbox_here [ 0 ] * dd ;
bbox_here [ 3 ] = PP - > bbox [ 0 ] + ibbox_here [ 3 ] * dd ;
dd = ( PP - > bbox [ 4 ] - PP - > bbox [ 1 ] ) / ( PP - > shape [ 1 ] - 1 ) ;
bbox_here [ 1 ] = PP - > bbox [ 1 ] + ibbox_here [ 1 ] * dd ;
bbox_here [ 4 ] = PP - > bbox [ 1 ] + ibbox_here [ 4 ] * dd ;
dd = ( PP - > bbox [ 5 ] - PP - > bbox [ 2 ] ) / ( PP - > shape [ 2 ] - 1 ) ;
bbox_here [ 2 ] = PP - > bbox [ 2 ] + ibbox_here [ 2 ] * dd ;
bbox_here [ 5 ] = PP - > bbox [ 2 ] + ibbox_here [ 5 ] * dd ;
# else
# ifdef Cell
// 0--5, 5--10
dd = ( PP - > bbox [ 3 ] - PP - > bbox [ 0 ] ) / PP - > shape [ 0 ] ;
bbox_here [ 0 ] = PP - > bbox [ 0 ] + ( ibbox_here [ 0 ] ) * dd ;
bbox_here [ 3 ] = PP - > bbox [ 0 ] + ( ibbox_here [ 3 ] + 1 ) * dd ;
dd = ( PP - > bbox [ 4 ] - PP - > bbox [ 1 ] ) / PP - > shape [ 1 ] ;
bbox_here [ 1 ] = PP - > bbox [ 1 ] + ( ibbox_here [ 1 ] ) * dd ;
bbox_here [ 4 ] = PP - > bbox [ 1 ] + ( ibbox_here [ 4 ] + 1 ) * dd ;
dd = ( PP - > bbox [ 5 ] - PP - > bbox [ 2 ] ) / PP - > shape [ 2 ] ;
bbox_here [ 2 ] = PP - > bbox [ 2 ] + ( ibbox_here [ 2 ] ) * dd ;
bbox_here [ 5 ] = PP - > bbox [ 2 ] + ( ibbox_here [ 5 ] + 1 ) * dd ;
# else
# error Not define Vertex nor Cell
# endif
# endif
//ng = createMappedBlock(BlL, dim, shape_here, bbox_here, current_block_id, ingfsi, fngfsi, PP->lev);
Block * ng = new Block ( dim , shape_here , bbox_here , current_block_id , ingfsi , fngfsi , PP - > lev ) ;
if ( BlL )
BlL - > insert ( ng ) ;
else
BlL = new MyList < Block > ( ng ) ;
current_ng_start = ng ;
}
// --- 3. 统一处理 Patch 起始 Block 指针 ---
if ( first_block_in_patch ) {
ng0 = current_ng_start ;
// 立即设置 PP->blb, 避免后续循环覆盖 ng0
MyList < Block > * Bp_start = BlL ;
while ( Bp_start & & Bp_start - > data ! = ng0 ) Bp_start = Bp_start - > next ;
PP - > blb = Bp_start ;
first_block_in_patch = false ;
}
current_block_id + + ;
}
// --- 4. 设置 Patch 结束 Block 指针 ---
MyList < Block > * Bp_end = BlL ;
while ( Bp_end & & Bp_end - > data ! = ng )
Bp_end = Bp_end - > next ;
PP - > ble = Bp_end ;
PLi = PLi - > next ;
first_block_in_patch = true ;
}
if ( reacpu < nodes * 2 / 3 )
{
int myrank ;
MPI_Comm_rank ( MPI_COMM_WORLD , & myrank ) ;
if ( myrank = = 0 )
cout < < " Parallel::distribute CAUSTION: level# " < < lev < < " uses essencially " < < reacpu < < " processors vs " < < nodes < < " nodes run, your scientific computation scale is not as large as you estimate. " < < endl ;
}
return BlL ;
}
Block * Parallel : : splitHotspotBlock ( MyList < Block > * & BlL , int _dim ,
int ib0_orig , int ib3_orig ,
int jb1_orig , int jb4_orig ,
int kb2_orig , int kb5_orig ,
Patch * PP , int r_1 , int r_2 , int r_3 , int r_4 ,
int ingfsi , int fngfsi , bool periodic ,
Block * & split_first_block , Block * & split_last_block )
{
// 1. 计算四分索引区间
// 计算 X 方向的总网格数
int total_len = ib3_orig - ib0_orig + 1 ;
// 计算三个切分点,确保覆盖 [ib0_orig, ib3_orig]
// 段 1: [ib0_orig, m1]
// 段 2: [m1 + 1, m2]
// 段 3: [m2 + 1, m3]
// 段 4: [m3 + 1, ib3_orig]
int m1 = ib0_orig + total_len / 4 - 1 ;
int m2 = ib0_orig + total_len / 2 - 1 ;
int m3 = ib0_orig + ( 3 * total_len ) / 4 - 1 ;
int indices [ 4 ] [ 6 ] = {
{ ib0_orig , jb1_orig , kb2_orig , m1 , jb4_orig , kb5_orig } , // 子块 1
{ m1 + 1 , jb1_orig , kb2_orig , m2 , jb4_orig , kb5_orig } , // 子块 2
{ m2 + 1 , jb1_orig , kb2_orig , m3 , jb4_orig , kb5_orig } , // 子块 3
{ m3 + 1 , jb1_orig , kb2_orig , ib3_orig , jb4_orig , kb5_orig } // 子块 4
} ;
int target_ranks [ 4 ] = { r_1 , r_2 , r_3 , r_4 } ;
// 2. 内部处理逻辑 (保持原有的 Ghost 扩张和物理坐标转换)
auto createSubBlock = [ & ] ( int * ib_raw , int target_rank ) {
int ib_final [ 6 ] ;
int sh_here [ 3 ] ;
double bb_here [ 6 ] , dd ;
// --- 逻辑 A: Ghost 扩张 ---
if ( periodic ) {
ib_final [ 0 ] = ib_raw [ 0 ] - ghost_width ;
ib_final [ 3 ] = ib_raw [ 3 ] + ghost_width ;
ib_final [ 1 ] = ib_raw [ 1 ] - ghost_width ;
ib_final [ 4 ] = ib_raw [ 4 ] + ghost_width ;
ib_final [ 2 ] = ib_raw [ 2 ] - ghost_width ;
ib_final [ 5 ] = ib_raw [ 5 ] + ghost_width ;
} else {
ib_final [ 0 ] = Mymax ( 0 , ib_raw [ 0 ] - ghost_width ) ;
ib_final [ 3 ] = Mymin ( PP - > shape [ 0 ] - 1 , ib_raw [ 3 ] + ghost_width ) ;
ib_final [ 1 ] = Mymax ( 0 , ib_raw [ 1 ] - ghost_width ) ;
ib_final [ 4 ] = Mymin ( PP - > shape [ 1 ] - 1 , ib_raw [ 4 ] + ghost_width ) ;
ib_final [ 2 ] = Mymax ( 0 , ib_raw [ 2 ] - ghost_width ) ;
ib_final [ 5 ] = Mymin ( PP - > shape [ 2 ] - 1 , ib_raw [ 5 ] + ghost_width ) ;
}
sh_here [ 0 ] = ib_final [ 3 ] - ib_final [ 0 ] + 1 ;
sh_here [ 1 ] = ib_final [ 4 ] - ib_final [ 1 ] + 1 ;
sh_here [ 2 ] = ib_final [ 5 ] - ib_final [ 2 ] + 1 ;
// --- 逻辑 B: 物理坐标计算 ---
dd = ( PP - > bbox [ 3 ] - PP - > bbox [ 0 ] ) / PP - > shape [ 0 ] ;
bb_here [ 0 ] = PP - > bbox [ 0 ] + ib_final [ 0 ] * dd ;
bb_here [ 3 ] = PP - > bbox [ 0 ] + ( ib_final [ 3 ] + 1 ) * dd ;
dd = ( PP - > bbox [ 4 ] - PP - > bbox [ 1 ] ) / PP - > shape [ 1 ] ;
bb_here [ 1 ] = PP - > bbox [ 1 ] + ib_final [ 1 ] * dd ;
bb_here [ 4 ] = PP - > bbox [ 1 ] + ( ib_final [ 4 ] + 1 ) * dd ;
dd = ( PP - > bbox [ 5 ] - PP - > bbox [ 2 ] ) / PP - > shape [ 2 ] ;
bb_here [ 2 ] = PP - > bbox [ 2 ] + ib_final [ 2 ] * dd ;
bb_here [ 5 ] = PP - > bbox [ 2 ] + ( ib_final [ 5 ] + 1 ) * dd ;
Block * Bg = new Block ( _dim , sh_here , bb_here , target_rank , ingfsi , fngfsi , PP - > lev ) ;
if ( BlL ) BlL - > insert ( Bg ) ;
else BlL = new MyList < Block > ( Bg ) ;
return Bg ;
} ;
// 3. 执行创建并返回首尾指针
split_first_block = createSubBlock ( indices [ 0 ] , target_ranks [ 0 ] ) ;
createSubBlock ( indices [ 1 ] , target_ranks [ 1 ] ) ; // 中间块仅插入 List
createSubBlock ( indices [ 2 ] , target_ranks [ 2 ] ) ; // 中间块仅插入 List
split_last_block = createSubBlock ( indices [ 3 ] , target_ranks [ 3 ] ) ;
return split_first_block ;
}
MyList < Block > * Parallel : : distribute_hard ( MyList < Patch > * PatchLIST , int cpusize , int ingfsi , int fngfsi ,
bool periodic , int nodes )
{