[TEST]UPSTREAM: Pick some source changes from 48080d0a97
* Sync new folder structure
This commit is contained in:
246
AMSS_NCKU_source/Runge_Kutta/rungekutta4_rout.f90
Normal file
246
AMSS_NCKU_source/Runge_Kutta/rungekutta4_rout.f90
Normal file
@@ -0,0 +1,246 @@
|
||||
!-----------------------------------------------------------------------------
|
||||
! $Id: rungekutta4_rout.f90,v 1.6 2012/12/26 11:47:43 zjcao Exp $
|
||||
! Carry out 4th-order Runge-Kutta method
|
||||
!-----------------------------------------------------------------------------
|
||||
! rk4 for scalar
|
||||
subroutine rungekutta4_scalar(dT,f0,f1,f_rhs,RK4)
|
||||
|
||||
implicit none
|
||||
|
||||
!~~~~~~% Input parameters:
|
||||
|
||||
integer ,intent(in):: RK4
|
||||
real*8 ,intent(in):: dT,f0
|
||||
real*8 ,intent(inout):: f1,f_rhs
|
||||
|
||||
!~~~~~~% Local parameter
|
||||
|
||||
real*8, parameter :: F1o6=1.d0/6.d0, HLF=5.d-1, TWO=2.d0
|
||||
|
||||
if( RK4 == 0 ) then
|
||||
|
||||
f1 = f0 + HLF * dT * f_rhs
|
||||
|
||||
elseif(RK4 == 1 ) then
|
||||
|
||||
f_rhs = f_rhs + TWO * f1
|
||||
f1 = f0 + HLF * dT * f1
|
||||
|
||||
elseif(RK4 == 2 ) then
|
||||
|
||||
f_rhs = f_rhs + TWO * f1
|
||||
f1 = f0 + dT * f1
|
||||
|
||||
elseif( RK4 == 3 ) then
|
||||
|
||||
f1 = f0 +F1o6 * dT *(f1 + f_rhs)
|
||||
|
||||
else
|
||||
|
||||
write(*,*) "rungekutta4_scalar: something is wrong in RK4 counting!!"
|
||||
stop
|
||||
|
||||
endif
|
||||
|
||||
return
|
||||
|
||||
end subroutine rungekutta4_scalar
|
||||
!~~~~~~~~~~~~~~~~~~
|
||||
! rk4 for complex scalar
|
||||
subroutine rungekutta4_cplxscalar(dT,f0,f1,f_rhs,RK4)
|
||||
|
||||
implicit none
|
||||
|
||||
!~~~~~~% Input parameters:
|
||||
|
||||
integer ,intent(in):: RK4
|
||||
real*8 ,intent(in):: dT
|
||||
double complex ,intent(in):: f0
|
||||
double complex ,intent(inout):: f1,f_rhs
|
||||
|
||||
!~~~~~~% Local parameter
|
||||
|
||||
real*8, parameter :: F1o6=1.d0/6.d0, HLF=5.d-1, TWO=2.d0
|
||||
|
||||
if( RK4 == 0 ) then
|
||||
|
||||
f1 = f0 + HLF * dT * f_rhs
|
||||
|
||||
elseif(RK4 == 1 ) then
|
||||
|
||||
f_rhs = f_rhs + TWO * f1
|
||||
f1 = f0 + HLF * dT * f1
|
||||
|
||||
elseif(RK4 == 2 ) then
|
||||
|
||||
f_rhs = f_rhs + TWO * f1
|
||||
f1 = f0 + dT * f1
|
||||
|
||||
elseif( RK4 == 3 ) then
|
||||
|
||||
f1 = f0 +F1o6 * dT *(f1 + f_rhs)
|
||||
|
||||
else
|
||||
|
||||
write(*,*) "rungekutta4_cplxscalar: something is wrong in RK4 counting!!"
|
||||
stop
|
||||
|
||||
endif
|
||||
|
||||
return
|
||||
|
||||
end subroutine rungekutta4_cplxscalar
|
||||
!~~~~~~~~~~~~~~~~~~
|
||||
subroutine rungekutta4_rout(ex,dT,f0,f1,f_rhs,RK4)
|
||||
|
||||
implicit none
|
||||
|
||||
!~~~~~~% Input parameters:
|
||||
|
||||
integer ,intent(in):: ex(1:3),RK4
|
||||
real*8 ,intent(in):: dT
|
||||
real*8, dimension(ex(1),ex(2),ex(3)),intent(in) ::f0
|
||||
real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) ::f_rhs
|
||||
real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) ::f1
|
||||
|
||||
!~~~~~~% Local parameter
|
||||
|
||||
real*8, parameter :: F1o6=1.d0/6.d0, HLF=5.d-1, TWO=2.d0
|
||||
|
||||
if( RK4 == 0 ) then
|
||||
|
||||
f1 = f0 + HLF * dT * f_rhs
|
||||
|
||||
elseif(RK4 == 1 ) then
|
||||
|
||||
f_rhs = f_rhs + TWO * f1
|
||||
|
||||
f1 = f0 + HLF * dT * f1
|
||||
|
||||
elseif(RK4 == 2 ) then
|
||||
|
||||
f_rhs = f_rhs + TWO * f1
|
||||
|
||||
f1 = f0 + dT * f1
|
||||
|
||||
elseif( RK4 == 3 ) then
|
||||
|
||||
f1 = f0 +F1o6 * dT *(f1 + f_rhs)
|
||||
|
||||
else
|
||||
|
||||
write(*,*) "rungekutta4_rout: something is wrong in RK4 counting!!"
|
||||
stop
|
||||
|
||||
endif
|
||||
|
||||
return
|
||||
|
||||
end subroutine rungekutta4_rout
|
||||
!-----------------------------------------------------------------------------
|
||||
! icn for scalar
|
||||
subroutine icn_scalar(dT,f0,f1,f_rhs,RK4)
|
||||
|
||||
implicit none
|
||||
|
||||
!~~~~~~% Input parameters:
|
||||
|
||||
integer ,intent(in):: RK4
|
||||
real*8 ,intent(in):: dT,f0
|
||||
real*8 ,intent(inout):: f1,f_rhs
|
||||
|
||||
!~~~~~~% Local parameter
|
||||
|
||||
real*8, parameter :: HLF=5.d-1
|
||||
|
||||
if( RK4 == 0 ) then
|
||||
|
||||
f1 = f0 + dT * f_rhs
|
||||
|
||||
else
|
||||
|
||||
f1 = f0 + HLF * dT * (f1+f_rhs)
|
||||
|
||||
endif
|
||||
|
||||
return
|
||||
|
||||
end subroutine icn_scalar
|
||||
!~~~~~~~~~~~~~~~~~~
|
||||
! icn for complex scalar
|
||||
subroutine icn_cplxscalar(dT,f0,f1,f_rhs,RK4)
|
||||
|
||||
implicit none
|
||||
|
||||
!~~~~~~% Input parameters:
|
||||
|
||||
integer ,intent(in):: RK4
|
||||
real*8 ,intent(in):: dT
|
||||
double complex ,intent(in):: f0
|
||||
double complex ,intent(inout):: f1,f_rhs
|
||||
|
||||
!~~~~~~% Local parameter
|
||||
|
||||
real*8, parameter :: HLF=5.d-1
|
||||
|
||||
if( RK4 == 0 ) then
|
||||
|
||||
f1 = f0 + dT * f_rhs
|
||||
|
||||
else
|
||||
|
||||
f1 = f0 + HLF * dT * (f1+f_rhs)
|
||||
|
||||
endif
|
||||
|
||||
return
|
||||
|
||||
end subroutine icn_cplxscalar
|
||||
!~~~~~~~~~~~~~~~~~~
|
||||
subroutine icn_rout(ex,dT,f0,f1,f_rhs,RK4)
|
||||
|
||||
implicit none
|
||||
|
||||
!~~~~~~% Input parameters:
|
||||
|
||||
integer ,intent(in):: ex(1:3),RK4
|
||||
real*8 ,intent(in):: dT
|
||||
real*8, dimension(ex(1),ex(2),ex(3)),intent(in) ::f0
|
||||
real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) ::f_rhs
|
||||
real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) ::f1
|
||||
|
||||
!~~~~~~% Local parameter
|
||||
|
||||
real*8, parameter :: HLF=5.d-1
|
||||
|
||||
if( RK4 == 0 ) then
|
||||
|
||||
f1 = f0 + dT * f_rhs
|
||||
|
||||
else
|
||||
|
||||
f1 = f0 + HLF * dT * (f1+f_rhs)
|
||||
|
||||
endif
|
||||
|
||||
return
|
||||
|
||||
end subroutine icn_rout
|
||||
!~~~~~~~~~~~~~~~~~~
|
||||
subroutine euler_rout(ex,dT,f0,f1,f_rhs)
|
||||
|
||||
implicit none
|
||||
|
||||
!~~~~~~% Input parameters:
|
||||
|
||||
integer ,intent(in):: ex(1:3)
|
||||
real*8 ,intent(in):: dT
|
||||
real*8, dimension(ex(1),ex(2),ex(3)),intent(in) ::f0
|
||||
real*8, dimension(ex(1),ex(2),ex(3)),intent(in) ::f_rhs
|
||||
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) ::f1
|
||||
|
||||
f1 = f0 + dT * f_rhs
|
||||
|
||||
return
|
||||
|
||||
end subroutine euler_rout
|
||||
57
AMSS_NCKU_source/Runge_Kutta/rungekutta4_rout.h
Normal file
57
AMSS_NCKU_source/Runge_Kutta/rungekutta4_rout.h
Normal file
@@ -0,0 +1,57 @@
|
||||
|
||||
#ifndef RUNGEKUTTA4_H
|
||||
#define RUNGEKUTTA4_H
|
||||
|
||||
#ifdef fortran1
|
||||
#define f_euler_rout euler_rout
|
||||
#define f_rungekutta4_rout rungekutta4_rout
|
||||
#define f_rungekutta4_scalar rungekutta4_scalar
|
||||
#define f_icn_rout icn_rout
|
||||
#define f_icn_scalar icn_scalar
|
||||
#endif
|
||||
#ifdef fortran2
|
||||
#define f_euler_rout EULER_ROUT
|
||||
#define f_rungekutta4_rout RUNGEKUTTA4_ROUT
|
||||
#define f_rungekutta4_scalar RUNGEKUTTA4_SCALAR
|
||||
#define f_icn_rout ICN_ROUT
|
||||
#define f_icn_scalar ICN_SCALAR
|
||||
#endif
|
||||
#ifdef fortran3
|
||||
#define f_euler_rout euler_rout_
|
||||
#define f_rungekutta4_rout rungekutta4_rout_
|
||||
#define f_rungekutta4_scalar rungekutta4_scalar_
|
||||
#define f_icn_rout icn_rout_
|
||||
#define f_icn_scalar icn_scalar_
|
||||
#endif
|
||||
|
||||
extern "C"
|
||||
{
|
||||
void f_rungekutta4_scalar(double &, double &, double &, double &, int &);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
{
|
||||
int f_rungekutta4_rout(int *, double &,
|
||||
double *, double *, double *,
|
||||
int &);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
{
|
||||
void f_icn_scalar(double &, double &, double &, double &, int &);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
{
|
||||
int f_icn_rout(int *, double &,
|
||||
double *, double *, double *,
|
||||
int &);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
{
|
||||
int f_euler_rout(int *, double &,
|
||||
double *, double *, double *);
|
||||
}
|
||||
|
||||
#endif /* RUNGEKUTTA4_H */
|
||||
212
AMSS_NCKU_source/Runge_Kutta/rungekutta4_rout_c.C
Normal file
212
AMSS_NCKU_source/Runge_Kutta/rungekutta4_rout_c.C
Normal file
@@ -0,0 +1,212 @@
|
||||
#include "rungekutta4_rout.h"
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstddef>
|
||||
#include <complex>
|
||||
#include <immintrin.h>
|
||||
|
||||
namespace {
|
||||
|
||||
inline void rk4_stage0(std::size_t n,
|
||||
const double *__restrict f0,
|
||||
const double *__restrict frhs,
|
||||
double *__restrict f1,
|
||||
double c) {
|
||||
std::size_t i = 0;
|
||||
#if defined(__AVX512F__)
|
||||
const __m512d vc = _mm512_set1_pd(c);
|
||||
for (; i + 7 < n; i += 8) {
|
||||
const __m512d v0 = _mm512_loadu_pd(f0 + i);
|
||||
const __m512d vr = _mm512_loadu_pd(frhs + i);
|
||||
_mm512_storeu_pd(f1 + i, _mm512_fmadd_pd(vc, vr, v0));
|
||||
}
|
||||
#elif defined(__AVX2__)
|
||||
const __m256d vc = _mm256_set1_pd(c);
|
||||
for (; i + 3 < n; i += 4) {
|
||||
const __m256d v0 = _mm256_loadu_pd(f0 + i);
|
||||
const __m256d vr = _mm256_loadu_pd(frhs + i);
|
||||
_mm256_storeu_pd(f1 + i, _mm256_fmadd_pd(vc, vr, v0));
|
||||
}
|
||||
#endif
|
||||
#pragma ivdep
|
||||
for (; i < n; ++i) {
|
||||
f1[i] = f0[i] + c * frhs[i];
|
||||
}
|
||||
}
|
||||
|
||||
inline void rk4_rhs_accum(std::size_t n,
|
||||
const double *__restrict f1,
|
||||
double *__restrict frhs) {
|
||||
std::size_t i = 0;
|
||||
#if defined(__AVX512F__)
|
||||
const __m512d v2 = _mm512_set1_pd(2.0);
|
||||
for (; i + 7 < n; i += 8) {
|
||||
const __m512d v1 = _mm512_loadu_pd(f1 + i);
|
||||
const __m512d vrhs = _mm512_loadu_pd(frhs + i);
|
||||
_mm512_storeu_pd(frhs + i, _mm512_fmadd_pd(v2, v1, vrhs));
|
||||
}
|
||||
#elif defined(__AVX2__)
|
||||
const __m256d v2 = _mm256_set1_pd(2.0);
|
||||
for (; i + 3 < n; i += 4) {
|
||||
const __m256d v1 = _mm256_loadu_pd(f1 + i);
|
||||
const __m256d vrhs = _mm256_loadu_pd(frhs + i);
|
||||
_mm256_storeu_pd(frhs + i, _mm256_fmadd_pd(v2, v1, vrhs));
|
||||
}
|
||||
#endif
|
||||
#pragma ivdep
|
||||
for (; i < n; ++i) {
|
||||
frhs[i] = frhs[i] + 2.0 * f1[i];
|
||||
}
|
||||
}
|
||||
|
||||
inline void rk4_f1_from_f0_f1(std::size_t n,
|
||||
const double *__restrict f0,
|
||||
double *__restrict f1,
|
||||
double c) {
|
||||
std::size_t i = 0;
|
||||
#if defined(__AVX512F__)
|
||||
const __m512d vc = _mm512_set1_pd(c);
|
||||
for (; i + 7 < n; i += 8) {
|
||||
const __m512d v0 = _mm512_loadu_pd(f0 + i);
|
||||
const __m512d v1 = _mm512_loadu_pd(f1 + i);
|
||||
_mm512_storeu_pd(f1 + i, _mm512_fmadd_pd(vc, v1, v0));
|
||||
}
|
||||
#elif defined(__AVX2__)
|
||||
const __m256d vc = _mm256_set1_pd(c);
|
||||
for (; i + 3 < n; i += 4) {
|
||||
const __m256d v0 = _mm256_loadu_pd(f0 + i);
|
||||
const __m256d v1 = _mm256_loadu_pd(f1 + i);
|
||||
_mm256_storeu_pd(f1 + i, _mm256_fmadd_pd(vc, v1, v0));
|
||||
}
|
||||
#endif
|
||||
#pragma ivdep
|
||||
for (; i < n; ++i) {
|
||||
f1[i] = f0[i] + c * f1[i];
|
||||
}
|
||||
}
|
||||
|
||||
inline void rk4_stage3(std::size_t n,
|
||||
const double *__restrict f0,
|
||||
double *__restrict f1,
|
||||
const double *__restrict frhs,
|
||||
double c) {
|
||||
std::size_t i = 0;
|
||||
#if defined(__AVX512F__)
|
||||
const __m512d vc = _mm512_set1_pd(c);
|
||||
for (; i + 7 < n; i += 8) {
|
||||
const __m512d v0 = _mm512_loadu_pd(f0 + i);
|
||||
const __m512d v1 = _mm512_loadu_pd(f1 + i);
|
||||
const __m512d vr = _mm512_loadu_pd(frhs + i);
|
||||
_mm512_storeu_pd(f1 + i, _mm512_fmadd_pd(vc, _mm512_add_pd(v1, vr), v0));
|
||||
}
|
||||
#elif defined(__AVX2__)
|
||||
const __m256d vc = _mm256_set1_pd(c);
|
||||
for (; i + 3 < n; i += 4) {
|
||||
const __m256d v0 = _mm256_loadu_pd(f0 + i);
|
||||
const __m256d v1 = _mm256_loadu_pd(f1 + i);
|
||||
const __m256d vr = _mm256_loadu_pd(frhs + i);
|
||||
_mm256_storeu_pd(f1 + i, _mm256_fmadd_pd(vc, _mm256_add_pd(v1, vr), v0));
|
||||
}
|
||||
#endif
|
||||
#pragma ivdep
|
||||
for (; i < n; ++i) {
|
||||
f1[i] = f0[i] + c * (f1[i] + frhs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
extern "C" {
|
||||
|
||||
void f_rungekutta4_scalar(double &dT, double &f0, double &f1, double &f_rhs, int &RK4) {
|
||||
constexpr double F1o6 = 1.0 / 6.0;
|
||||
constexpr double HLF = 0.5;
|
||||
constexpr double TWO = 2.0;
|
||||
|
||||
switch (RK4) {
|
||||
case 0:
|
||||
f1 = f0 + HLF * dT * f_rhs;
|
||||
break;
|
||||
case 1:
|
||||
f_rhs = f_rhs + TWO * f1;
|
||||
f1 = f0 + HLF * dT * f1;
|
||||
break;
|
||||
case 2:
|
||||
f_rhs = f_rhs + TWO * f1;
|
||||
f1 = f0 + dT * f1;
|
||||
break;
|
||||
case 3:
|
||||
f1 = f0 + F1o6 * dT * (f1 + f_rhs);
|
||||
break;
|
||||
default:
|
||||
std::fprintf(stderr, "rungekutta4_scalar_c: invalid RK4 stage %d\n", RK4);
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
|
||||
void rungekutta4_cplxscalar_(double &dT,
|
||||
std::complex<double> &f0,
|
||||
std::complex<double> &f1,
|
||||
std::complex<double> &f_rhs,
|
||||
int &RK4) {
|
||||
constexpr double F1o6 = 1.0 / 6.0;
|
||||
constexpr double HLF = 0.5;
|
||||
constexpr double TWO = 2.0;
|
||||
|
||||
switch (RK4) {
|
||||
case 0:
|
||||
f1 = f0 + HLF * dT * f_rhs;
|
||||
break;
|
||||
case 1:
|
||||
f_rhs = f_rhs + TWO * f1;
|
||||
f1 = f0 + HLF * dT * f1;
|
||||
break;
|
||||
case 2:
|
||||
f_rhs = f_rhs + TWO * f1;
|
||||
f1 = f0 + dT * f1;
|
||||
break;
|
||||
case 3:
|
||||
f1 = f0 + F1o6 * dT * (f1 + f_rhs);
|
||||
break;
|
||||
default:
|
||||
std::fprintf(stderr, "rungekutta4_cplxscalar_c: invalid RK4 stage %d\n", RK4);
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
|
||||
int f_rungekutta4_rout(int *ex, double &dT,
|
||||
double *f0, double *f1, double *f_rhs,
|
||||
int &RK4) {
|
||||
const std::size_t n = static_cast<std::size_t>(ex[0]) *
|
||||
static_cast<std::size_t>(ex[1]) *
|
||||
static_cast<std::size_t>(ex[2]);
|
||||
const double *const __restrict f0r = f0;
|
||||
double *const __restrict f1r = f1;
|
||||
double *const __restrict frhs = f_rhs;
|
||||
|
||||
if (__builtin_expect(static_cast<unsigned>(RK4) > 3u, 0)) {
|
||||
std::fprintf(stderr, "rungekutta4_rout_c: invalid RK4 stage %d\n", RK4);
|
||||
std::abort();
|
||||
}
|
||||
|
||||
switch (RK4) {
|
||||
case 0:
|
||||
rk4_stage0(n, f0r, frhs, f1r, 0.5 * dT);
|
||||
break;
|
||||
case 1:
|
||||
rk4_rhs_accum(n, f1r, frhs);
|
||||
rk4_f1_from_f0_f1(n, f0r, f1r, 0.5 * dT);
|
||||
break;
|
||||
case 2:
|
||||
rk4_rhs_accum(n, f1r, frhs);
|
||||
rk4_f1_from_f0_f1(n, f0r, f1r, dT);
|
||||
break;
|
||||
default:
|
||||
rk4_stage3(n, f0r, f1r, frhs, (1.0 / 6.0) * dT);
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
Reference in New Issue
Block a user