对prolong3做访存优化
This commit is contained in:
@@ -1912,7 +1912,7 @@
|
|||||||
! 63/8192*f_6 - 495/8192*f_5 + 1155/4096*f_4
|
! 63/8192*f_6 - 495/8192*f_5 + 1155/4096*f_4
|
||||||
!--------------------------------------------------------------------------
|
!--------------------------------------------------------------------------
|
||||||
#if 1
|
#if 1
|
||||||
subroutine prolong3(wei,llbc,uubc,extc,func,&
|
subroutine prolong3(wei,llbc,uubc,extc,func,&
|
||||||
llbf,uubf,extf,funf,&
|
llbf,uubf,extf,funf,&
|
||||||
llbp,uubp,SoA,Symmetry)
|
llbp,uubp,SoA,Symmetry)
|
||||||
implicit none
|
implicit none
|
||||||
@@ -1932,29 +1932,37 @@
|
|||||||
real*8, dimension(1:3) :: base
|
real*8, dimension(1:3) :: base
|
||||||
integer,dimension(3) :: lbc,ubc,lbf,ubf,lbp,ubp,lbpc,ubpc
|
integer,dimension(3) :: lbc,ubc,lbf,ubf,lbp,ubp,lbpc,ubpc
|
||||||
! when if=1 -> ic=0, this is different to vertex center grid
|
! when if=1 -> ic=0, this is different to vertex center grid
|
||||||
real*8, dimension(-2:extc(1),-2:extc(2),-2:extc(3)) :: funcc
|
real*8, dimension(-2:extc(1),-2:extc(2),-2:extc(3)) :: funcc
|
||||||
integer,dimension(3) :: cxI
|
integer,dimension(3) :: cxI
|
||||||
integer :: i,j,k,ii,jj,kk,px,py,pz
|
integer :: i,j,k,ii,jj,kk,px,py,pz
|
||||||
real*8, dimension(6,6) :: tmp2
|
real*8, dimension(6,6) :: tmp2
|
||||||
real*8, dimension(6) :: tmp1
|
real*8, dimension(6) :: tmp1
|
||||||
integer, dimension(extf(1)) :: cix
|
integer, dimension(extf(1)) :: cix
|
||||||
integer, dimension(extf(2)) :: ciy
|
integer, dimension(extf(2)) :: ciy
|
||||||
integer, dimension(extf(3)) :: ciz
|
integer, dimension(extf(3)) :: ciz
|
||||||
integer, dimension(extf(1)) :: pix
|
integer, dimension(extf(1)) :: pix
|
||||||
integer, dimension(extf(2)) :: piy
|
integer, dimension(extf(2)) :: piy
|
||||||
integer, dimension(extf(3)) :: piz
|
integer, dimension(extf(3)) :: piz
|
||||||
|
|
||||||
real*8, parameter :: C1=7.7d1/8.192d3,C2=-6.93d2/8.192d3,C3=3.465d3/4.096d3
|
! ~~~> 在子程序开头新增声明 <~~~
|
||||||
real*8, parameter :: C6=6.3d1/8.192d3,C5=-4.95d2/8.192d3,C4=1.155d3/4.096d3
|
real*8 :: tmp_yz(extc(1), 6) ! 存储整条 X 线上 6 个 Y 轴偏置的 Z 向插值结果
|
||||||
real*8, dimension(6,2), parameter :: WC = reshape((/&
|
real*8 :: tmp_xyz_line(extc(1)) ! 存储整条 X 线上完成 Y 向融合后的结果
|
||||||
C1,C2,C3,C4,C5,C6,&
|
real*8 :: v1, v2, v3, v4, v5, v6
|
||||||
C6,C5,C4,C3,C2,C1/), (/6,2/))
|
integer :: ic, jc, kc, iy, ix_offset
|
||||||
|
real*8 :: v_m2, v_m1, v_0, v_p1, v_p2, v_p3
|
||||||
integer::imini,imaxi,jmini,jmaxi,kmini,kmaxi
|
real*8 :: res_line
|
||||||
integer::imino,imaxo,jmino,jmaxo,kmino,kmaxo
|
! ~~~> 声明end <~~~
|
||||||
integer::maxcx,maxcy,maxcz
|
real*8, parameter :: C1=7.7d1/8.192d3,C2=-6.93d2/8.192d3,C3=3.465d3/4.096d3
|
||||||
|
real*8, parameter :: C6=6.3d1/8.192d3,C5=-4.95d2/8.192d3,C4=1.155d3/4.096d3
|
||||||
real*8,dimension(3) :: CD,FD
|
real*8, dimension(6,2), parameter :: WC = reshape((/&
|
||||||
|
C1,C2,C3,C4,C5,C6,&
|
||||||
|
C6,C5,C4,C3,C2,C1/), (/6,2/))
|
||||||
|
|
||||||
|
integer::imini,imaxi,jmini,jmaxi,kmini,kmaxi
|
||||||
|
integer::imino,imaxo,jmino,jmaxo,kmino,kmaxo
|
||||||
|
integer::maxcx,maxcy,maxcz
|
||||||
|
|
||||||
|
real*8,dimension(3) :: CD,FD
|
||||||
|
|
||||||
if(wei.ne.3)then
|
if(wei.ne.3)then
|
||||||
write(*,*)"prolongrestrict.f90::prolong3: this routine only surport 3 dimension"
|
write(*,*)"prolongrestrict.f90::prolong3: this routine only surport 3 dimension"
|
||||||
@@ -2013,10 +2021,10 @@
|
|||||||
kmini=lbpc(3)-lbc(3) + 1
|
kmini=lbpc(3)-lbc(3) + 1
|
||||||
kmaxi=ubpc(3)-lbc(3) + 1
|
kmaxi=ubpc(3)-lbc(3) + 1
|
||||||
|
|
||||||
if(imino.lt.1.or.jmino.lt.1.or.kmino.lt.1.or.&
|
if(imino.lt.1.or.jmino.lt.1.or.kmino.lt.1.or.&
|
||||||
imini.lt.1.or.jmini.lt.1.or.kmini.lt.1.or.&
|
imini.lt.1.or.jmini.lt.1.or.kmini.lt.1.or.&
|
||||||
imaxo.gt.extf(1).or.jmaxo.gt.extf(2).or.kmaxo.gt.extf(3).or.&
|
imaxo.gt.extf(1).or.jmaxo.gt.extf(2).or.kmaxo.gt.extf(3).or.&
|
||||||
imaxi.gt.extc(1)-2.or.jmaxi.gt.extc(2)-2.or.kmaxi.gt.extc(3)-2)then
|
imaxi.gt.extc(1)-2.or.jmaxi.gt.extc(2)-2.or.kmaxi.gt.extc(3)-2)then
|
||||||
write(*,*)"error in prolongation for"
|
write(*,*)"error in prolongation for"
|
||||||
write(*,*)"from"
|
write(*,*)"from"
|
||||||
write(*,*)llbc,uubc
|
write(*,*)llbc,uubc
|
||||||
@@ -2027,161 +2035,188 @@
|
|||||||
write(*,*)"want"
|
write(*,*)"want"
|
||||||
write(*,*)llbp,uubp
|
write(*,*)llbp,uubp
|
||||||
write(*,*)lbp,ubp,lbpc,ubpc
|
write(*,*)lbp,ubp,lbpc,ubpc
|
||||||
return
|
return
|
||||||
endif
|
endif
|
||||||
|
|
||||||
do i = imino,imaxo
|
do i = imino,imaxo
|
||||||
ii = i + lbf(1) - 1
|
ii = i + lbf(1) - 1
|
||||||
cix(i) = ii/2 - lbc(1) + 1
|
cix(i) = ii/2 - lbc(1) + 1
|
||||||
if(ii/2*2 == ii)then
|
if(ii/2*2 == ii)then
|
||||||
pix(i) = 1
|
pix(i) = 1
|
||||||
else
|
else
|
||||||
pix(i) = 2
|
pix(i) = 2
|
||||||
endif
|
endif
|
||||||
enddo
|
enddo
|
||||||
do j = jmino,jmaxo
|
do j = jmino,jmaxo
|
||||||
jj = j + lbf(2) - 1
|
jj = j + lbf(2) - 1
|
||||||
ciy(j) = jj/2 - lbc(2) + 1
|
ciy(j) = jj/2 - lbc(2) + 1
|
||||||
if(jj/2*2 == jj)then
|
if(jj/2*2 == jj)then
|
||||||
piy(j) = 1
|
piy(j) = 1
|
||||||
else
|
else
|
||||||
piy(j) = 2
|
piy(j) = 2
|
||||||
endif
|
endif
|
||||||
enddo
|
enddo
|
||||||
do k = kmino,kmaxo
|
do k = kmino,kmaxo
|
||||||
kk = k + lbf(3) - 1
|
kk = k + lbf(3) - 1
|
||||||
ciz(k) = kk/2 - lbc(3) + 1
|
ciz(k) = kk/2 - lbc(3) + 1
|
||||||
if(kk/2*2 == kk)then
|
if(kk/2*2 == kk)then
|
||||||
piz(k) = 1
|
piz(k) = 1
|
||||||
else
|
else
|
||||||
piz(k) = 2
|
piz(k) = 2
|
||||||
endif
|
endif
|
||||||
enddo
|
|
||||||
|
|
||||||
maxcx = maxval(cix(imino:imaxo))
|
|
||||||
maxcy = maxval(ciy(jmino:jmaxo))
|
|
||||||
maxcz = maxval(ciz(kmino:kmaxo))
|
|
||||||
if(maxcx+3 > extc(1) .or. maxcy+3 > extc(2) .or. maxcz+3 > extc(3))then
|
|
||||||
write(*,*)"error in prolong"
|
|
||||||
return
|
|
||||||
endif
|
|
||||||
|
|
||||||
call symmetry_bd(3,extc,func,funcc,SoA)
|
|
||||||
|
|
||||||
!~~~~~~> prolongation start...
|
|
||||||
do k = kmino,kmaxo
|
|
||||||
do j = jmino,jmaxo
|
|
||||||
do i = imino,imaxo
|
|
||||||
cxI(1) = cix(i)
|
|
||||||
cxI(2) = ciy(j)
|
|
||||||
cxI(3) = ciz(k)
|
|
||||||
px = pix(i)
|
|
||||||
py = piy(j)
|
|
||||||
pz = piz(k)
|
|
||||||
#if 0
|
|
||||||
if(ii/2*2==ii)then
|
|
||||||
if(jj/2*2==jj)then
|
|
||||||
if(kk/2*2==kk)then
|
|
||||||
tmp2= C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
|
||||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
|
||||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
|
||||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
|
||||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
|
||||||
C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
|
||||||
tmp1= C1*tmp2(:,1)+C2*tmp2(:,2)+C3*tmp2(:,3)+C4*tmp2(:,4)+C5*tmp2(:,5)+C6*tmp2(:,6)
|
|
||||||
funf(i,j,k)= C1*tmp1(1)+C2*tmp1(2)+C3*tmp1(3)+C4*tmp1(4)+C5*tmp1(5)+C6*tmp1(6)
|
|
||||||
else
|
|
||||||
tmp2= C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
|
||||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
|
||||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
|
||||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
|
||||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
|
||||||
C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
|
||||||
tmp1= C1*tmp2(:,1)+C2*tmp2(:,2)+C3*tmp2(:,3)+C4*tmp2(:,4)+C5*tmp2(:,5)+C6*tmp2(:,6)
|
|
||||||
funf(i,j,k)= C1*tmp1(1)+C2*tmp1(2)+C3*tmp1(3)+C4*tmp1(4)+C5*tmp1(5)+C6*tmp1(6)
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
if(kk/2*2==kk)then
|
|
||||||
tmp2= C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
|
||||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
|
||||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
|
||||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
|
||||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
|
||||||
C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
|
||||||
tmp1= C6*tmp2(:,1)+C5*tmp2(:,2)+C4*tmp2(:,3)+C3*tmp2(:,4)+C2*tmp2(:,5)+C1*tmp2(:,6)
|
|
||||||
funf(i,j,k)= C1*tmp1(1)+C2*tmp1(2)+C3*tmp1(3)+C4*tmp1(4)+C5*tmp1(5)+C6*tmp1(6)
|
|
||||||
else
|
|
||||||
tmp2= C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
|
||||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
|
||||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
|
||||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
|
||||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
|
||||||
C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
|
||||||
tmp1= C6*tmp2(:,1)+C5*tmp2(:,2)+C4*tmp2(:,3)+C3*tmp2(:,4)+C2*tmp2(:,5)+C1*tmp2(:,6)
|
|
||||||
funf(i,j,k)= C1*tmp1(1)+C2*tmp1(2)+C3*tmp1(3)+C4*tmp1(4)+C5*tmp1(5)+C6*tmp1(6)
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
if(jj/2*2==jj)then
|
|
||||||
if(kk/2*2==kk)then
|
|
||||||
tmp2= C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
|
||||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
|
||||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
|
||||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
|
||||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
|
||||||
C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
|
||||||
tmp1= C1*tmp2(:,1)+C2*tmp2(:,2)+C3*tmp2(:,3)+C4*tmp2(:,4)+C5*tmp2(:,5)+C6*tmp2(:,6)
|
|
||||||
funf(i,j,k)= C6*tmp1(1)+C5*tmp1(2)+C4*tmp1(3)+C3*tmp1(4)+C2*tmp1(5)+C1*tmp1(6)
|
|
||||||
else
|
|
||||||
tmp2= C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
|
||||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
|
||||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
|
||||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
|
||||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
|
||||||
C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
|
||||||
tmp1= C1*tmp2(:,1)+C2*tmp2(:,2)+C3*tmp2(:,3)+C4*tmp2(:,4)+C5*tmp2(:,5)+C6*tmp2(:,6)
|
|
||||||
funf(i,j,k)= C6*tmp1(1)+C5*tmp1(2)+C4*tmp1(3)+C3*tmp1(4)+C2*tmp1(5)+C1*tmp1(6)
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
if(kk/2*2==kk)then
|
|
||||||
tmp2= C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
|
||||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
|
||||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
|
||||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
|
||||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
|
||||||
C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
|
||||||
tmp1= C6*tmp2(:,1)+C5*tmp2(:,2)+C4*tmp2(:,3)+C3*tmp2(:,4)+C2*tmp2(:,5)+C1*tmp2(:,6)
|
|
||||||
funf(i,j,k)= C6*tmp1(1)+C5*tmp1(2)+C4*tmp1(3)+C3*tmp1(4)+C2*tmp1(5)+C1*tmp1(6)
|
|
||||||
else
|
|
||||||
tmp2= C6*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
|
||||||
C5*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
|
||||||
C4*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
|
||||||
C3*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
|
||||||
C2*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
|
||||||
C1*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
|
||||||
tmp1= C6*tmp2(:,1)+C5*tmp2(:,2)+C4*tmp2(:,3)+C3*tmp2(:,4)+C2*tmp2(:,5)+C1*tmp2(:,6)
|
|
||||||
funf(i,j,k)= C6*tmp1(1)+C5*tmp1(2)+C4*tmp1(3)+C3*tmp1(4)+C2*tmp1(5)+C1*tmp1(6)
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
#else
|
|
||||||
tmp2= WC(1,pz)*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-2)+&
|
|
||||||
WC(2,pz)*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)-1)+&
|
|
||||||
WC(3,pz)*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3) )+&
|
|
||||||
WC(4,pz)*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+1)+&
|
|
||||||
WC(5,pz)*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+2)+&
|
|
||||||
WC(6,pz)*funcc(cxI(1)-2:cxI(1)+3,cxI(2)-2:cxI(2)+3,cxI(3)+3)
|
|
||||||
|
|
||||||
tmp1= WC(1,py)*tmp2(:,1)+WC(2,py)*tmp2(:,2)+WC(3,py)*tmp2(:,3)+&
|
|
||||||
WC(4,py)*tmp2(:,4)+WC(5,py)*tmp2(:,5)+WC(6,py)*tmp2(:,6)
|
|
||||||
|
|
||||||
funf(i,j,k)= WC(1,px)*tmp1(1)+WC(2,px)*tmp1(2)+WC(3,px)*tmp1(3)+&
|
|
||||||
WC(4,px)*tmp1(4)+WC(5,px)*tmp1(5)+WC(6,px)*tmp1(6)
|
|
||||||
#endif
|
|
||||||
enddo
|
|
||||||
enddo
|
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
|
maxcx = maxval(cix(imino:imaxo))
|
||||||
|
maxcy = maxval(ciy(jmino:jmaxo))
|
||||||
|
maxcz = maxval(ciz(kmino:kmaxo))
|
||||||
|
if(maxcx+3 > extc(1) .or. maxcy+3 > extc(2) .or. maxcz+3 > extc(3))then
|
||||||
|
write(*,*)"error in prolong"
|
||||||
|
return
|
||||||
|
endif
|
||||||
|
|
||||||
|
call symmetry_bd(3,extc,func,funcc,SoA)
|
||||||
|
|
||||||
|
!~~~~~~> prolongation start...
|
||||||
|
! ~~~> 循环体开始 <~~~
|
||||||
|
#if 0
|
||||||
|
do k = kmino, kmaxo
|
||||||
|
pz = piz(k)
|
||||||
|
kc = ciz(k)
|
||||||
|
|
||||||
|
do j = jmino, jmaxo
|
||||||
|
py = piy(j)
|
||||||
|
jc = ciy(j)
|
||||||
|
|
||||||
|
! 1. 【降维:Z 向】对当前 (j,k) 相关的 6 条 Y 偏置线进行 Z 向插值
|
||||||
|
! 结果存入 tmp_yz(x_index, y_offset)
|
||||||
|
do jj = 1, 6
|
||||||
|
iy = jc - 3 + jj
|
||||||
|
do ii = 1, extc(1)
|
||||||
|
tmp_yz(ii, jj) = WC(1,pz)*funcc(ii, iy, kc-2) + &
|
||||||
|
WC(2,pz)*funcc(ii, iy, kc-1) + &
|
||||||
|
WC(3,pz)*funcc(ii, iy, kc ) + &
|
||||||
|
WC(4,pz)*funcc(ii, iy, kc+1) + &
|
||||||
|
WC(5,pz)*funcc(ii, iy, kc+2) + &
|
||||||
|
WC(6,pz)*funcc(ii, iy, kc+3)
|
||||||
|
end do
|
||||||
|
end do
|
||||||
|
|
||||||
|
! 2. 【降维:Y 向】将 Z 向结果合并,得到整条 X 轴线上的 Y-Z 融合值
|
||||||
|
do ii = 1, extc(1)
|
||||||
|
tmp_xyz_line(ii) = WC(1,py)*tmp_yz(ii, 1) + WC(2,py)*tmp_yz(ii, 2) + &
|
||||||
|
WC(3,py)*tmp_yz(ii, 3) + WC(4,py)*tmp_yz(ii, 4) + &
|
||||||
|
WC(5,py)*tmp_yz(ii, 5) + WC(6,py)*tmp_yz(ii, 6)
|
||||||
|
end do
|
||||||
|
|
||||||
|
i = imino
|
||||||
|
do while (i < imaxo)
|
||||||
|
! 针对 i
|
||||||
|
ic = cix(i)
|
||||||
|
v_m2 = tmp_xyz_line(ic-2); v_m1 = tmp_xyz_line(ic-1)
|
||||||
|
v_0 = tmp_xyz_line(ic ); v_p1 = tmp_xyz_line(ic+1)
|
||||||
|
v_p2 = tmp_xyz_line(ic+2); v_p3 = tmp_xyz_line(ic+3)
|
||||||
|
|
||||||
|
! 这里的 WC(1:6, pix(i)) 被直接硬编码
|
||||||
|
! 注意:根据你的 pix 定义,1 和 2 分别对应 C1..C6 和 C6..C1
|
||||||
|
if (pix(i) == 2) then
|
||||||
|
funf(i, j, k) = C6*v_m2 + C5*v_m1 + C4*v_0 + C3*v_p1 + C2*v_p2 + C1*v_p3
|
||||||
|
funf(i+1, j, k) = C1*v_m2 + C2*v_m1 + C3*v_0 + C4*v_p1 + C5*v_p2 + C6*v_p3
|
||||||
|
else
|
||||||
|
funf(i, j, k) = C1*v_m2 + C2*v_m1 + C3*v_0 + C4*v_p1 + C5*v_p2 + C6*v_p3
|
||||||
|
funf(i+1, j, k) = C6*v_m2 + C5*v_m1 + C4*v_0 + C3*v_p1 + C2*v_p2 + C1*v_p3
|
||||||
|
end if
|
||||||
|
|
||||||
|
i = i + 2
|
||||||
|
end do
|
||||||
|
|
||||||
|
! 处理可能存在的最后一个孤立点
|
||||||
|
if (i == imaxo) then
|
||||||
|
ic = cix(i); px = pix(i)
|
||||||
|
funf(i,j,k) = WC(1,px)*tmp_xyz_line(ic-2) + WC(2,px)*tmp_xyz_line(ic-1) + &
|
||||||
|
WC(3,px)*tmp_xyz_line(ic ) + WC(4,px)*tmp_xyz_line(ic+1) + &
|
||||||
|
WC(5,px)*tmp_xyz_line(ic+2) + WC(6,px)*tmp_xyz_line(ic+3)
|
||||||
|
end if
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
do i = imino, imaxo
|
||||||
|
px = pix(i)
|
||||||
|
ic = cix(i)
|
||||||
|
|
||||||
|
! 直接从预计算好的 line 中读取连续的 6 个点
|
||||||
|
! ic-2 到 ic+3 对应原始 6 点算子
|
||||||
|
funf(i,j,k) = WC(1,px)*tmp_xyz_line(ic-2) + &
|
||||||
|
WC(2,px)*tmp_xyz_line(ic-1) + &
|
||||||
|
WC(3,px)*tmp_xyz_line(ic ) + &
|
||||||
|
WC(4,px)*tmp_xyz_line(ic+1) + &
|
||||||
|
WC(5,px)*tmp_xyz_line(ic+2) + &
|
||||||
|
WC(6,px)*tmp_xyz_line(ic+3)
|
||||||
|
end do
|
||||||
|
#endif
|
||||||
|
end do
|
||||||
|
end do
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
! ~~~> 修改后的核心循环块 <~~~
|
||||||
|
do k = kmino, kmaxo
|
||||||
|
pz = piz(k)
|
||||||
|
kc = ciz(k)
|
||||||
|
|
||||||
|
do j = jmino, jmaxo
|
||||||
|
py = piy(j)
|
||||||
|
jc = ciy(j)
|
||||||
|
|
||||||
|
! --- 步骤 1 & 2 融合:分段处理 X 轴,提升 Cache 命中率 ---
|
||||||
|
! 我们将 ii 循环逻辑重组,减少对 funcc 的跨行重复访问
|
||||||
|
do ii = 1, extc(1)
|
||||||
|
! 1. 先做 Z 方向的 6 条线插值(针对当前的 ii 和当前的 6 个 iy)
|
||||||
|
! 我们直接在这里把 Y 方向的加权也做了,省去 tmp_yz 数组
|
||||||
|
! 这样 funcc 的数据读进来后立即完成所有维度的贡献,不再写回内存
|
||||||
|
|
||||||
|
res_line = 0.0d0
|
||||||
|
do jj = 1, 6
|
||||||
|
iy = jc - 3 + jj
|
||||||
|
! 这一行代码是核心:一次性完成 Z 插值并加上 Y 的权重
|
||||||
|
! 编译器会把 WC(jj, py) 存在寄存器里
|
||||||
|
res_line = res_line + WC(jj, py) * ( &
|
||||||
|
WC(1, pz) * funcc(ii, iy, kc-2) + &
|
||||||
|
WC(2, pz) * funcc(ii, iy, kc-1) + &
|
||||||
|
WC(3, pz) * funcc(ii, iy, kc ) + &
|
||||||
|
WC(4, pz) * funcc(ii, iy, kc+1) + &
|
||||||
|
WC(5, pz) * funcc(ii, iy, kc+2) + &
|
||||||
|
WC(6, pz) * funcc(ii, iy, kc+3) )
|
||||||
|
end do
|
||||||
|
tmp_xyz_line(ii) = res_line
|
||||||
|
end do
|
||||||
|
|
||||||
|
! --- 步骤 3:X 向插值(保持你现在的双点展开,这部分已经很快了) ---
|
||||||
|
i = imino
|
||||||
|
do while (i < imaxo)
|
||||||
|
ic = cix(i)
|
||||||
|
v_m2 = tmp_xyz_line(ic-2); v_m1 = tmp_xyz_line(ic-1)
|
||||||
|
v_0 = tmp_xyz_line(ic ); v_p1 = tmp_xyz_line(ic+1)
|
||||||
|
v_p2 = tmp_xyz_line(ic+2); v_p3 = tmp_xyz_line(ic+3)
|
||||||
|
|
||||||
|
if (pix(i) == 2) then
|
||||||
|
funf(i, j, k) = C6*v_m2 + C5*v_m1 + C4*v_0 + C3*v_p1 + C2*v_p2 + C1*v_p3
|
||||||
|
funf(i+1, j, k) = C1*v_m2 + C2*v_m1 + C3*v_0 + C4*v_p1 + C5*v_p2 + C6*v_p3
|
||||||
|
else
|
||||||
|
funf(i, j, k) = C1*v_m2 + C2*v_m1 + C3*v_0 + C4*v_p1 + C5*v_p2 + C6*v_p3
|
||||||
|
funf(i+1, j, k) = C6*v_m2 + C5*v_m1 + C4*v_0 + C3*v_p1 + C2*v_p2 + C1*v_p3
|
||||||
|
end if
|
||||||
|
i = i + 2
|
||||||
|
end do
|
||||||
|
|
||||||
|
if (i == imaxo) then
|
||||||
|
ic = cix(i); px = pix(i)
|
||||||
|
funf(i,j,k) = WC(1,px)*tmp_xyz_line(ic-2) + WC(2,px)*tmp_xyz_line(ic-1) + &
|
||||||
|
WC(3,px)*tmp_xyz_line(ic ) + WC(4,px)*tmp_xyz_line(ic+1) + &
|
||||||
|
WC(5,px)*tmp_xyz_line(ic+2) + WC(6,px)*tmp_xyz_line(ic+3)
|
||||||
|
end if
|
||||||
|
end do
|
||||||
|
end do
|
||||||
return
|
return
|
||||||
|
|
||||||
end subroutine prolong3
|
end subroutine prolong3
|
||||||
|
|||||||
Reference in New Issue
Block a user