diff --git a/AMSS_NCKU_source/enforce_algebra.f90 b/AMSS_NCKU_source/enforce_algebra.f90 index 71f3da2..2a511a5 100644 --- a/AMSS_NCKU_source/enforce_algebra.f90 +++ b/AMSS_NCKU_source/enforce_algebra.f90 @@ -18,49 +18,61 @@ real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Ayy,Ayz,Azz !~~~~~~~> Local variable: - - real*8, dimension(ex(1),ex(2),ex(3)) :: trA,detg - real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz - real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz,gupyy,gupyz,gupzz + + integer :: i,j,k + real*8 :: lgxx,lgyy,lgzz,ldetg + real*8 :: lgupxx,lgupxy,lgupxz,lgupyy,lgupyz,lgupzz + real*8 :: ltrA,lscale real*8, parameter :: F1o3 = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0 !~~~~~~> - gxx = dxx + ONE - gyy = dyy + ONE - gzz = dzz + ONE + do k=1,ex(3) + do j=1,ex(2) + do i=1,ex(1) - detg = gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - & - gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz - gupxx = ( gyy * gzz - gyz * gyz ) / detg - gupxy = - ( gxy * gzz - gyz * gxz ) / detg - gupxz = ( gxy * gyz - gyy * gxz ) / detg - gupyy = ( gxx * gzz - gxz * gxz ) / detg - gupyz = - ( gxx * gyz - gxy * gxz ) / detg - gupzz = ( gxx * gyy - gxy * gxy ) / detg + lgxx = dxx(i,j,k) + ONE + lgyy = dyy(i,j,k) + ONE + lgzz = dzz(i,j,k) + ONE - trA = gupxx * Axx + gupyy * Ayy + gupzz * Azz & - + TWO * (gupxy * Axy + gupxz * Axz + gupyz * Ayz) + ldetg = lgxx * lgyy * lgzz & + + gxy(i,j,k) * gyz(i,j,k) * gxz(i,j,k) & + + gxz(i,j,k) * gxy(i,j,k) * gyz(i,j,k) & + - gxz(i,j,k) * lgyy * gxz(i,j,k) & + - gxy(i,j,k) * gxy(i,j,k) * lgzz & + - lgxx * gyz(i,j,k) * gyz(i,j,k) - Axx = Axx - F1o3 * gxx * trA - Axy = Axy - F1o3 * gxy * trA - Axz = Axz - F1o3 * gxz * trA - Ayy = Ayy - F1o3 * gyy * trA - Ayz = Ayz - F1o3 * gyz * trA - Azz = Azz - F1o3 * gzz * trA + lgupxx = ( lgyy * lgzz - gyz(i,j,k) * gyz(i,j,k) ) / ldetg + lgupxy = - ( gxy(i,j,k) * lgzz - gyz(i,j,k) * gxz(i,j,k) ) / ldetg + lgupxz = ( gxy(i,j,k) * gyz(i,j,k) - lgyy * gxz(i,j,k) ) / ldetg + lgupyy = ( lgxx * lgzz - gxz(i,j,k) * gxz(i,j,k) ) / ldetg + lgupyz = - ( lgxx * gyz(i,j,k) - gxy(i,j,k) * gxz(i,j,k) ) / ldetg + lgupzz = ( lgxx * lgyy - gxy(i,j,k) * gxy(i,j,k) ) / ldetg - detg = ONE / ( detg ** F1o3 ) - - gxx = gxx * detg - gxy = gxy * detg - gxz = gxz * detg - gyy = gyy * detg - gyz = gyz * detg - gzz = gzz * detg + ltrA = lgupxx * Axx(i,j,k) + lgupyy * Ayy(i,j,k) & + + lgupzz * Azz(i,j,k) & + + TWO * (lgupxy * Axy(i,j,k) + lgupxz * Axz(i,j,k) & + + lgupyz * Ayz(i,j,k)) - dxx = gxx - ONE - dyy = gyy - ONE - dzz = gzz - ONE + Axx(i,j,k) = Axx(i,j,k) - F1o3 * lgxx * ltrA + Axy(i,j,k) = Axy(i,j,k) - F1o3 * gxy(i,j,k) * ltrA + Axz(i,j,k) = Axz(i,j,k) - F1o3 * gxz(i,j,k) * ltrA + Ayy(i,j,k) = Ayy(i,j,k) - F1o3 * lgyy * ltrA + Ayz(i,j,k) = Ayz(i,j,k) - F1o3 * gyz(i,j,k) * ltrA + Azz(i,j,k) = Azz(i,j,k) - F1o3 * lgzz * ltrA + + lscale = ONE / ( ldetg ** F1o3 ) + + dxx(i,j,k) = lgxx * lscale - ONE + gxy(i,j,k) = gxy(i,j,k) * lscale + gxz(i,j,k) = gxz(i,j,k) * lscale + dyy(i,j,k) = lgyy * lscale - ONE + gyz(i,j,k) = gyz(i,j,k) * lscale + dzz(i,j,k) = lgzz * lscale - ONE + + enddo + enddo + enddo return @@ -82,51 +94,71 @@ real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Ayy,Ayz,Azz !~~~~~~~> Local variable: - - real*8, dimension(ex(1),ex(2),ex(3)) :: trA - real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz - real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz,gupyy,gupyz,gupzz + + integer :: i,j,k + real*8 :: lgxx,lgyy,lgzz,lscale + real*8 :: lgxy,lgxz,lgyz + real*8 :: lgupxx,lgupxy,lgupxz,lgupyy,lgupyz,lgupzz + real*8 :: ltrA real*8, parameter :: F1o3 = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0 !~~~~~~> - gxx = dxx + ONE - gyy = dyy + ONE - gzz = dzz + ONE -! for g - gupzz = gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - & - gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz + do k=1,ex(3) + do j=1,ex(2) + do i=1,ex(1) - gupzz = ONE / ( gupzz ** F1o3 ) - - gxx = gxx * gupzz - gxy = gxy * gupzz - gxz = gxz * gupzz - gyy = gyy * gupzz - gyz = gyz * gupzz - gzz = gzz * gupzz +! for g: normalize determinant first + lgxx = dxx(i,j,k) + ONE + lgyy = dyy(i,j,k) + ONE + lgzz = dzz(i,j,k) + ONE + lgxy = gxy(i,j,k) + lgxz = gxz(i,j,k) + lgyz = gyz(i,j,k) - dxx = gxx - ONE - dyy = gyy - ONE - dzz = gzz - ONE -! for A + lscale = lgxx * lgyy * lgzz + lgxy * lgyz * lgxz & + + lgxz * lgxy * lgyz - lgxz * lgyy * lgxz & + - lgxy * lgxy * lgzz - lgxx * lgyz * lgyz - gupxx = ( gyy * gzz - gyz * gyz ) - gupxy = - ( gxy * gzz - gyz * gxz ) - gupxz = ( gxy * gyz - gyy * gxz ) - gupyy = ( gxx * gzz - gxz * gxz ) - gupyz = - ( gxx * gyz - gxy * gxz ) - gupzz = ( gxx * gyy - gxy * gxy ) + lscale = ONE / ( lscale ** F1o3 ) - trA = gupxx * Axx + gupyy * Ayy + gupzz * Azz & - + TWO * (gupxy * Axy + gupxz * Axz + gupyz * Ayz) + lgxx = lgxx * lscale + lgxy = lgxy * lscale + lgxz = lgxz * lscale + lgyy = lgyy * lscale + lgyz = lgyz * lscale + lgzz = lgzz * lscale - Axx = Axx - F1o3 * gxx * trA - Axy = Axy - F1o3 * gxy * trA - Axz = Axz - F1o3 * gxz * trA - Ayy = Ayy - F1o3 * gyy * trA - Ayz = Ayz - F1o3 * gyz * trA - Azz = Azz - F1o3 * gzz * trA + dxx(i,j,k) = lgxx - ONE + gxy(i,j,k) = lgxy + gxz(i,j,k) = lgxz + dyy(i,j,k) = lgyy - ONE + gyz(i,j,k) = lgyz + dzz(i,j,k) = lgzz - ONE + +! for A: trace-free using normalized metric (det=1, no division needed) + lgupxx = ( lgyy * lgzz - lgyz * lgyz ) + lgupxy = - ( lgxy * lgzz - lgyz * lgxz ) + lgupxz = ( lgxy * lgyz - lgyy * lgxz ) + lgupyy = ( lgxx * lgzz - lgxz * lgxz ) + lgupyz = - ( lgxx * lgyz - lgxy * lgxz ) + lgupzz = ( lgxx * lgyy - lgxy * lgxy ) + + ltrA = lgupxx * Axx(i,j,k) + lgupyy * Ayy(i,j,k) & + + lgupzz * Azz(i,j,k) & + + TWO * (lgupxy * Axy(i,j,k) + lgupxz * Axz(i,j,k) & + + lgupyz * Ayz(i,j,k)) + + Axx(i,j,k) = Axx(i,j,k) - F1o3 * lgxx * ltrA + Axy(i,j,k) = Axy(i,j,k) - F1o3 * lgxy * ltrA + Axz(i,j,k) = Axz(i,j,k) - F1o3 * lgxz * ltrA + Ayy(i,j,k) = Ayy(i,j,k) - F1o3 * lgyy * ltrA + Ayz(i,j,k) = Ayz(i,j,k) - F1o3 * lgyz * ltrA + Azz(i,j,k) = Azz(i,j,k) - F1o3 * lgzz * ltrA + + enddo + enddo + enddo return diff --git a/AMSS_NCKU_source/fmisc.f90 b/AMSS_NCKU_source/fmisc.f90 index b266a44..0feed47 100644 --- a/AMSS_NCKU_source/fmisc.f90 +++ b/AMSS_NCKU_source/fmisc.f90 @@ -324,7 +324,6 @@ subroutine symmetry_bd(ord,extc,func,funcc,SoA) integer::i - funcc = 0.d0 funcc(1:extc(1),1:extc(2),1:extc(3)) = func do i=0,ord-1 funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1) @@ -350,7 +349,6 @@ subroutine symmetry_tbd(ord,extc,func,funcc,SoA) integer::i - funcc = 0.d0 funcc(1:extc(1),1:extc(2),1:extc(3)) = func do i=0,ord-1 funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1) @@ -379,7 +377,6 @@ subroutine symmetry_stbd(ord,extc,func,funcc,SoA) integer::i - funcc = 0.d0 funcc(1:extc(1),1:extc(2),1:extc(3)) = func do i=0,ord-1 funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1) @@ -886,7 +883,6 @@ subroutine symmetry_bd(ord,extc,func,funcc,SoA) integer::i - funcc = 0.d0 funcc(1:extc(1),1:extc(2),1:extc(3)) = func do i=0,ord-1 funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1) @@ -912,7 +908,6 @@ subroutine symmetry_tbd(ord,extc,func,funcc,SoA) integer::i - funcc = 0.d0 funcc(1:extc(1),1:extc(2),1:extc(3)) = func do i=0,ord-1 funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1) @@ -941,7 +936,6 @@ subroutine symmetry_stbd(ord,extc,func,funcc,SoA) integer::i - funcc = 0.d0 funcc(1:extc(1),1:extc(2),1:extc(3)) = func do i=0,ord-1 funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1) diff --git a/AMSS_NCKU_source/makefile.inc b/AMSS_NCKU_source/makefile.inc index 8068ef3..489bbce 100755 --- a/AMSS_NCKU_source/makefile.inc +++ b/AMSS_NCKU_source/makefile.inc @@ -16,10 +16,10 @@ LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore ## -fp-model fast=2: Aggressive floating-point optimizations ## -fma: Enable fused multiply-add instructions ## Note: OpenMP has been disabled (-qopenmp removed) due to performance issues -CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma \ +CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \ -Dfortran3 -Dnewc -I${MKLROOT}/include -f90appflags = -O3 -xHost -fp-model fast=2 -fma \ - -fpp -I${MKLROOT}/include +f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \ + -align array64byte -fpp -I${MKLROOT}/include f90 = ifx f77 = ifx CXX = icpx