Compare commits
4 Commits
yx-prolong
...
hxh-omp
| Author | SHA1 | Date | |
|---|---|---|---|
| e11363e06e | |||
| f70e90f694 | |||
|
|
75dd5353b0 | ||
|
|
23a82d063b |
@@ -141,12 +141,26 @@ void fdderivs(const int ex[3],
|
|||||||
const int j4_hi = ex2 - 3;
|
const int j4_hi = ex2 - 3;
|
||||||
const int k4_hi = ex3 - 3;
|
const int k4_hi = ex3 - 3;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Strategy A:
|
||||||
|
* Avoid redundant work in overlap of 2nd/4th-order regions.
|
||||||
|
* Only compute 2nd-order on shell points that are NOT overwritten by
|
||||||
|
* the 4th-order pass.
|
||||||
|
*/
|
||||||
|
const int has4 = (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi);
|
||||||
|
|
||||||
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
|
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
|
||||||
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
|
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
|
||||||
const int kF = k0 + 1;
|
const int kF = k0 + 1;
|
||||||
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
|
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
|
||||||
const int jF = j0 + 1;
|
const int jF = j0 + 1;
|
||||||
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
|
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
|
||||||
|
if (has4 &&
|
||||||
|
i0 >= i4_lo && i0 <= i4_hi &&
|
||||||
|
j0 >= j4_lo && j0 <= j4_hi &&
|
||||||
|
k0 >= k4_lo && k0 <= k4_hi) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
const int iF = i0 + 1;
|
const int iF = i0 + 1;
|
||||||
const size_t p = idx_ex(i0, j0, k0, ex);
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
@@ -193,7 +207,7 @@ void fdderivs(const int ex[3],
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi) {
|
if (has4) {
|
||||||
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
|
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
|
||||||
const int kF = k0 + 1;
|
const int kF = k0 + 1;
|
||||||
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
|
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
|
||||||
|
|||||||
@@ -1932,30 +1932,34 @@
|
|||||||
real*8, dimension(1:3) :: base
|
real*8, dimension(1:3) :: base
|
||||||
integer,dimension(3) :: lbc,ubc,lbf,ubf,lbp,ubp,lbpc,ubpc
|
integer,dimension(3) :: lbc,ubc,lbf,ubf,lbp,ubp,lbpc,ubpc
|
||||||
! when if=1 -> ic=0, this is different to vertex center grid
|
! when if=1 -> ic=0, this is different to vertex center grid
|
||||||
real*8, dimension(-2:extc(1),-2:extc(2),-2:extc(3)) :: funcc
|
real*8, dimension(-2:extc(1),-2:extc(2),-2:extc(3)) :: funcc
|
||||||
integer,dimension(3) :: cxI
|
integer,dimension(3) :: cxI
|
||||||
integer :: i,j,k,ii,jj,kk,px,py,pz
|
integer :: i,j,k,ii,jj,kk,px,py,pz
|
||||||
real*8, dimension(6,6) :: tmp2
|
real*8, dimension(6,6) :: tmp2
|
||||||
real*8, dimension(6) :: tmp1
|
real*8, dimension(6) :: tmp1
|
||||||
integer, dimension(extf(1)) :: cix
|
integer, dimension(extf(1)) :: cix
|
||||||
integer, dimension(extf(2)) :: ciy
|
integer, dimension(extf(2)) :: ciy
|
||||||
integer, dimension(extf(3)) :: ciz
|
integer, dimension(extf(3)) :: ciz
|
||||||
integer, dimension(extf(1)) :: pix
|
integer, dimension(extf(1)) :: pix
|
||||||
integer, dimension(extf(2)) :: piy
|
integer, dimension(extf(2)) :: piy
|
||||||
integer, dimension(extf(3)) :: piz
|
integer, dimension(extf(3)) :: piz
|
||||||
|
|
||||||
real*8, parameter :: C1=7.7d1/8.192d3,C2=-6.93d2/8.192d3,C3=3.465d3/4.096d3
|
real*8, parameter :: C1=7.7d1/8.192d3,C2=-6.93d2/8.192d3,C3=3.465d3/4.096d3
|
||||||
real*8, parameter :: C6=6.3d1/8.192d3,C5=-4.95d2/8.192d3,C4=1.155d3/4.096d3
|
real*8, parameter :: C6=6.3d1/8.192d3,C5=-4.95d2/8.192d3,C4=1.155d3/4.096d3
|
||||||
real*8, dimension(6,2), parameter :: WC = reshape((/&
|
real*8, dimension(6,2), parameter :: WC = reshape((/&
|
||||||
C1,C2,C3,C4,C5,C6,&
|
C1,C2,C3,C4,C5,C6,&
|
||||||
C6,C5,C4,C3,C2,C1/), (/6,2/))
|
C6,C5,C4,C3,C2,C1/), (/6,2/))
|
||||||
|
|
||||||
integer::imini,imaxi,jmini,jmaxi,kmini,kmaxi
|
integer::imini,imaxi,jmini,jmaxi,kmini,kmaxi
|
||||||
integer::imino,imaxo,jmino,jmaxo,kmino,kmaxo
|
integer::imino,imaxo,jmino,jmaxo,kmino,kmaxo
|
||||||
integer::maxcx,maxcy,maxcz
|
integer::maxcx,maxcy,maxcz
|
||||||
|
|
||||||
real*8,dimension(3) :: CD,FD
|
real*8,dimension(3) :: CD,FD
|
||||||
|
real*8 :: tmp_yz(extc(1), 6) ! 存储整条 X 线上 6 个 Y 轴偏置的 Z 向插值结果
|
||||||
|
real*8 :: tmp_xyz_line(extc(1)) ! 存储整条 X 线上完成 Y 向融合后的结果
|
||||||
|
real*8 :: v1, v2, v3, v4, v5, v6
|
||||||
|
integer :: ic, jc, kc, ix_offset,ix,iy,iz
|
||||||
|
real*8 :: res_line
|
||||||
if(wei.ne.3)then
|
if(wei.ne.3)then
|
||||||
write(*,*)"prolongrestrict.f90::prolong3: this routine only surport 3 dimension"
|
write(*,*)"prolongrestrict.f90::prolong3: this routine only surport 3 dimension"
|
||||||
write(*,*)"dim = ",wei
|
write(*,*)"dim = ",wei
|
||||||
@@ -2013,10 +2017,10 @@
|
|||||||
kmini=lbpc(3)-lbc(3) + 1
|
kmini=lbpc(3)-lbc(3) + 1
|
||||||
kmaxi=ubpc(3)-lbc(3) + 1
|
kmaxi=ubpc(3)-lbc(3) + 1
|
||||||
|
|
||||||
if(imino.lt.1.or.jmino.lt.1.or.kmino.lt.1.or.&
|
if(imino.lt.1.or.jmino.lt.1.or.kmino.lt.1.or.&
|
||||||
imini.lt.1.or.jmini.lt.1.or.kmini.lt.1.or.&
|
imini.lt.1.or.jmini.lt.1.or.kmini.lt.1.or.&
|
||||||
imaxo.gt.extf(1).or.jmaxo.gt.extf(2).or.kmaxo.gt.extf(3).or.&
|
imaxo.gt.extf(1).or.jmaxo.gt.extf(2).or.kmaxo.gt.extf(3).or.&
|
||||||
imaxi.gt.extc(1)-2.or.jmaxi.gt.extc(2)-2.or.kmaxi.gt.extc(3)-2)then
|
imaxi.gt.extc(1)-2.or.jmaxi.gt.extc(2)-2.or.kmaxi.gt.extc(3)-2)then
|
||||||
write(*,*)"error in prolongation for"
|
write(*,*)"error in prolongation for"
|
||||||
write(*,*)"from"
|
write(*,*)"from"
|
||||||
write(*,*)llbc,uubc
|
write(*,*)llbc,uubc
|
||||||
@@ -2027,160 +2031,120 @@
|
|||||||
write(*,*)"want"
|
write(*,*)"want"
|
||||||
write(*,*)llbp,uubp
|
write(*,*)llbp,uubp
|
||||||
write(*,*)lbp,ubp,lbpc,ubpc
|
write(*,*)lbp,ubp,lbpc,ubpc
|
||||||
return
|
return
|
||||||
endif
|
endif
|
||||||
|
|
||||||
do i = imino,imaxo
|
do i = imino,imaxo
|
||||||
ii = i + lbf(1) - 1
|
ii = i + lbf(1) - 1
|
||||||
cix(i) = ii/2 - lbc(1) + 1
|
cix(i) = ii/2 - lbc(1) + 1
|
||||||
if(ii/2*2 == ii)then
|
if(ii/2*2 == ii)then
|
||||||
pix(i) = 1
|
pix(i) = 1
|
||||||
else
|
else
|
||||||
pix(i) = 2
|
pix(i) = 2
|
||||||
endif
|
endif
|
||||||
enddo
|
|
||||||
do j = jmino,jmaxo
|
|
||||||
jj = j + lbf(2) - 1
|
|
||||||
ciy(j) = jj/2 - lbc(2) + 1
|
|
||||||
if(jj/2*2 == jj)then
|
|
||||||
piy(j) = 1
|
|
||||||
else
|
|
||||||
piy(j) = 2
|
|
||||||
endif
|
|
||||||
enddo
|
|
||||||
do k = kmino,kmaxo
|
|
||||||
kk = k + lbf(3) - 1
|
|
||||||
ciz(k) = kk/2 - lbc(3) + 1
|
|
||||||
if(kk/2*2 == kk)then
|
|
||||||
piz(k) = 1
|
|
||||||
else
|
|
||||||
piz(k) = 2
|
|
||||||
endif
|
|
||||||
enddo
|
|
||||||
|
|
||||||
maxcx = maxval(cix(imino:imaxo))
|
|
||||||
maxcy = maxval(ciy(jmino:jmaxo))
|
|
||||||
maxcz = maxval(ciz(kmino:kmaxo))
|
|
||||||
if(maxcx+3 > extc(1) .or. maxcy+3 > extc(2) .or. maxcz+3 > extc(3))then
|
|
||||||
write(*,*)"error in prolong"
|
|
||||||
return
|
|
||||||
endif
|
|
||||||
|
|
||||||
call symmetry_bd(3,extc,func,funcc,SoA)
|
|
||||||
|
|
||||||
!~~~~~~> prolongation start...
|
|
||||||
do k = kmino,kmaxo
|
|
||||||
do j = jmino,jmaxo
|
|
||||||
do i = imino,imaxo
|
|
||||||
cxI(1) = cix(i)
|
|
||||||
cxI(2) = ciy(j)
|
|
||||||
cxI(3) = ciz(k)
|
|
||||||
px = pix(i)
|
|
||||||
py = piy(j)
|
|
||||||
pz = piz(k)
|
|
||||||
#if 0
|
|
||||||
if(ii/2*2==ii)then
|
|
||||||
if(jj/2*2==jj)then
|
|
||||||
if(kk/2*2==kk)then
|
|
||||||
tmp2= C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
|
||||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
|
||||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
|
||||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
|
||||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
|
||||||
C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
|
||||||
tmp1= C1*tmp2(:,1)+C2*tmp2(:,2)+C3*tmp2(:,3)+C4*tmp2(:,4)+C5*tmp2(:,5)+C6*tmp2(:,6)
|
|
||||||
funf(i,j,k)= C1*tmp1(1)+C2*tmp1(2)+C3*tmp1(3)+C4*tmp1(4)+C5*tmp1(5)+C6*tmp1(6)
|
|
||||||
else
|
|
||||||
tmp2= C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
|
||||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
|
||||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
|
||||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
|
||||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
|
||||||
C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
|
||||||
tmp1= C1*tmp2(:,1)+C2*tmp2(:,2)+C3*tmp2(:,3)+C4*tmp2(:,4)+C5*tmp2(:,5)+C6*tmp2(:,6)
|
|
||||||
funf(i,j,k)= C1*tmp1(1)+C2*tmp1(2)+C3*tmp1(3)+C4*tmp1(4)+C5*tmp1(5)+C6*tmp1(6)
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
if(kk/2*2==kk)then
|
|
||||||
tmp2= C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
|
||||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
|
||||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
|
||||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
|
||||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
|
||||||
C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
|
||||||
tmp1= C6*tmp2(:,1)+C5*tmp2(:,2)+C4*tmp2(:,3)+C3*tmp2(:,4)+C2*tmp2(:,5)+C1*tmp2(:,6)
|
|
||||||
funf(i,j,k)= C1*tmp1(1)+C2*tmp1(2)+C3*tmp1(3)+C4*tmp1(4)+C5*tmp1(5)+C6*tmp1(6)
|
|
||||||
else
|
|
||||||
tmp2= C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
|
||||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
|
||||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
|
||||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
|
||||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
|
||||||
C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
|
||||||
tmp1= C6*tmp2(:,1)+C5*tmp2(:,2)+C4*tmp2(:,3)+C3*tmp2(:,4)+C2*tmp2(:,5)+C1*tmp2(:,6)
|
|
||||||
funf(i,j,k)= C1*tmp1(1)+C2*tmp1(2)+C3*tmp1(3)+C4*tmp1(4)+C5*tmp1(5)+C6*tmp1(6)
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
if(jj/2*2==jj)then
|
|
||||||
if(kk/2*2==kk)then
|
|
||||||
tmp2= C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
|
||||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
|
||||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
|
||||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
|
||||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
|
||||||
C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
|
||||||
tmp1= C1*tmp2(:,1)+C2*tmp2(:,2)+C3*tmp2(:,3)+C4*tmp2(:,4)+C5*tmp2(:,5)+C6*tmp2(:,6)
|
|
||||||
funf(i,j,k)= C6*tmp1(1)+C5*tmp1(2)+C4*tmp1(3)+C3*tmp1(4)+C2*tmp1(5)+C1*tmp1(6)
|
|
||||||
else
|
|
||||||
tmp2= C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
|
||||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
|
||||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
|
||||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
|
||||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
|
||||||
C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
|
||||||
tmp1= C1*tmp2(:,1)+C2*tmp2(:,2)+C3*tmp2(:,3)+C4*tmp2(:,4)+C5*tmp2(:,5)+C6*tmp2(:,6)
|
|
||||||
funf(i,j,k)= C6*tmp1(1)+C5*tmp1(2)+C4*tmp1(3)+C3*tmp1(4)+C2*tmp1(5)+C1*tmp1(6)
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
if(kk/2*2==kk)then
|
|
||||||
tmp2= C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
|
||||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
|
||||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
|
||||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
|
||||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
|
||||||
C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
|
||||||
tmp1= C6*tmp2(:,1)+C5*tmp2(:,2)+C4*tmp2(:,3)+C3*tmp2(:,4)+C2*tmp2(:,5)+C1*tmp2(:,6)
|
|
||||||
funf(i,j,k)= C6*tmp1(1)+C5*tmp1(2)+C4*tmp1(3)+C3*tmp1(4)+C2*tmp1(5)+C1*tmp1(6)
|
|
||||||
else
|
|
||||||
tmp2= C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
|
||||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
|
||||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
|
||||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
|
||||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
|
||||||
C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
|
||||||
tmp1= C6*tmp2(:,1)+C5*tmp2(:,2)+C4*tmp2(:,3)+C3*tmp2(:,4)+C2*tmp2(:,5)+C1*tmp2(:,6)
|
|
||||||
funf(i,j,k)= C6*tmp1(1)+C5*tmp1(2)+C4*tmp1(3)+C3*tmp1(4)+C2*tmp1(5)+C1*tmp1(6)
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
#else
|
|
||||||
tmp2= WC(1,pz)*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
|
||||||
WC(2,pz)*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
|
||||||
WC(3,pz)*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
|
||||||
WC(4,pz)*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
|
||||||
WC(5,pz)*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
|
||||||
WC(6,pz)*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
|
||||||
|
|
||||||
tmp1= WC(1,py)*tmp2(:,1)+WC(2,py)*tmp2(:,2)+WC(3,py)*tmp2(:,3)+&
|
|
||||||
WC(4,py)*tmp2(:,4)+WC(5,py)*tmp2(:,5)+WC(6,py)*tmp2(:,6)
|
|
||||||
|
|
||||||
funf(i,j,k)= WC(1,px)*tmp1(1)+WC(2,px)*tmp1(2)+WC(3,px)*tmp1(3)+&
|
|
||||||
WC(4,px)*tmp1(4)+WC(5,px)*tmp1(5)+WC(6,px)*tmp1(6)
|
|
||||||
#endif
|
|
||||||
enddo
|
|
||||||
enddo
|
|
||||||
enddo
|
enddo
|
||||||
|
do j = jmino,jmaxo
|
||||||
|
jj = j + lbf(2) - 1
|
||||||
|
ciy(j) = jj/2 - lbc(2) + 1
|
||||||
|
if(jj/2*2 == jj)then
|
||||||
|
piy(j) = 1
|
||||||
|
else
|
||||||
|
piy(j) = 2
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
do k = kmino,kmaxo
|
||||||
|
kk = k + lbf(3) - 1
|
||||||
|
ciz(k) = kk/2 - lbc(3) + 1
|
||||||
|
if(kk/2*2 == kk)then
|
||||||
|
piz(k) = 1
|
||||||
|
else
|
||||||
|
piz(k) = 2
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
|
||||||
|
maxcx = maxval(cix(imino:imaxo))
|
||||||
|
maxcy = maxval(ciy(jmino:jmaxo))
|
||||||
|
maxcz = maxval(ciz(kmino:kmaxo))
|
||||||
|
if(maxcx+3 > extc(1) .or. maxcy+3 > extc(2) .or. maxcz+3 > extc(3))then
|
||||||
|
write(*,*)"error in prolong"
|
||||||
|
return
|
||||||
|
endif
|
||||||
|
|
||||||
|
call symmetry_bd(3,extc,func,funcc,SoA)
|
||||||
|
|
||||||
|
!~~~~~~> prolongation start...
|
||||||
|
do k = kmino, kmaxo
|
||||||
|
pz = piz(k)
|
||||||
|
kc = ciz(k)
|
||||||
|
|
||||||
|
do j = jmino, jmaxo
|
||||||
|
py = piy(j)
|
||||||
|
jc = ciy(j)
|
||||||
|
|
||||||
|
! --- 步骤 1 & 2 融合:分段处理 X 轴,提升 Cache 命中率 ---
|
||||||
|
! 我们将 ii 循环逻辑重组,减少对 funcc 的跨行重复访问
|
||||||
|
do ii = 1, extc(1)
|
||||||
|
! 1. 先做 Z 方向的 6 条线插值(针对当前的 ii 和当前的 6 个 iy)
|
||||||
|
! 我们直接在这里把 Y 方向的加权也做了,省去 tmp_yz 数组
|
||||||
|
! 这样 funcc 的数据读进来后立即完成所有维度的贡献,不再写回内存
|
||||||
|
|
||||||
|
res_line = 0.0d0
|
||||||
|
do jj = 1, 6
|
||||||
|
iy = jc - 3 + jj
|
||||||
|
! 这一行代码是核心:一次性完成 Z 插值并加上 Y 的权重
|
||||||
|
! 编译器会把 WC(jj, py) 存在寄存器里
|
||||||
|
res_line = res_line + WC(jj, py) * ( &
|
||||||
|
WC(1, pz) * funcc(ii, iy, kc-2) + &
|
||||||
|
WC(2, pz) * funcc(ii, iy, kc-1) + &
|
||||||
|
WC(3, pz) * funcc(ii, iy, kc ) + &
|
||||||
|
WC(4, pz) * funcc(ii, iy, kc+1) + &
|
||||||
|
WC(5, pz) * funcc(ii, iy, kc+2) + &
|
||||||
|
WC(6, pz) * funcc(ii, iy, kc+3) )
|
||||||
|
end do
|
||||||
|
tmp_xyz_line(ii) = res_line
|
||||||
|
end do
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
! 1. 【降维:Z 向】对当前 (j,k) 相关的 6 条 Y 偏置线进行 Z 向插值
|
||||||
|
! 结果存入 tmp_yz(x_index, y_offset)
|
||||||
|
do jj = 1, 6
|
||||||
|
iy = jc - 3 + jj
|
||||||
|
do ii = 1, extc(1)
|
||||||
|
tmp_yz(ii, jj) = WC(1,pz)*funcc(ii, iy, kc-2) + &
|
||||||
|
WC(2,pz)*funcc(ii, iy, kc-1) + &
|
||||||
|
WC(3,pz)*funcc(ii, iy, kc ) + &
|
||||||
|
WC(4,pz)*funcc(ii, iy, kc+1) + &
|
||||||
|
WC(5,pz)*funcc(ii, iy, kc+2) + &
|
||||||
|
WC(6,pz)*funcc(ii, iy, kc+3)
|
||||||
|
end do
|
||||||
|
end do
|
||||||
|
|
||||||
|
! 2. 【降维:Y 向】将 Z 向结果合并,得到整条 X 轴线上的 Y-Z 融合值
|
||||||
|
do ii = 1, extc(1)
|
||||||
|
tmp_xyz_line(ii) = WC(1,py)*tmp_yz(ii, 1) + WC(2,py)*tmp_yz(ii, 2) + &
|
||||||
|
WC(3,py)*tmp_yz(ii, 3) + WC(4,py)*tmp_yz(ii, 4) + &
|
||||||
|
WC(5,py)*tmp_yz(ii, 5) + WC(6,py)*tmp_yz(ii, 6)
|
||||||
|
end do
|
||||||
|
#endif
|
||||||
|
! 3. 【降维:X 向】最后在最内层只处理 X 方向的 6 点加权
|
||||||
|
! 此时每个点的计算量从原来的 200+ 次乘法降到了仅 6 次
|
||||||
|
do i = imino, imaxo
|
||||||
|
px = pix(i)
|
||||||
|
ic = cix(i)
|
||||||
|
|
||||||
|
! 直接从预计算好的 line 中读取连续的 6 个点
|
||||||
|
! ic-2 到 ic+3 对应原始 6 点算子
|
||||||
|
funf(i,j,k) = WC(1,px)*tmp_xyz_line(ic-2) + &
|
||||||
|
WC(2,px)*tmp_xyz_line(ic-1) + &
|
||||||
|
WC(3,px)*tmp_xyz_line(ic ) + &
|
||||||
|
WC(4,px)*tmp_xyz_line(ic+1) + &
|
||||||
|
WC(5,px)*tmp_xyz_line(ic+2) + &
|
||||||
|
WC(6,px)*tmp_xyz_line(ic+3)
|
||||||
|
end do
|
||||||
|
end do
|
||||||
|
end do
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user