Switch build system from Intel oneAPI to GCC + OpenMPI
- Replace compilers: ifx→gfortran, icx→gcc, icpx→g++, mpiicpx→mpicxx - Replace flags: -xHost→-march=x86-64-v4, -ipo→-flto, -fpp→-cpp - Replace flags: -fp-model fast=2→-ffast-math, -fma→-mfma - Replace flags: -qopenmp→-fopenmp - Remove Intel-specific: -align array64byte, -liomp5, -lifcore, -limf - Switch MKL interface: -lmkl_intel_lp64→-lmkl_gf_lp64 (gfortran) - Replace TBB malloc with optional jemalloc (default off) - Disable PGO entirely (was already marked negative optimization) - TwoPunctureABE and ABE both verified to build successfully Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
|
||||
|
||||
|
||||
|
||||
include makefile.inc
|
||||
|
||||
-include AMSS_NCKU_build.mk
|
||||
@@ -40,58 +40,41 @@ POLINT6_FLAG = -DPOLINT6_USE_BARYCENTRIC=$(POLINT6_USE_BARY)
|
||||
TRANSFER_CACHE_FLAG = -DBSSN_USE_TRANSFER_CACHE=$(EFFECTIVE_USE_TRANSFER_CACHE)
|
||||
ESCALAR_KERNEL_FLAG = -DBSSN_USE_ESCALAR_C_KERNEL=$(EFFECTIVE_USE_CXX_ESCALAR_KERNEL)
|
||||
|
||||
## ABE build flags selected by PGO_MODE (set in makefile.inc, default: opt)
|
||||
## make -> opt (PGO-guided, maximum performance)
|
||||
## make PGO_MODE=instrument -> instrument (Phase 1: collect fresh profile data)
|
||||
PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/default.profdata
|
||||
|
||||
ifeq ($(PGO_MODE),instrument)
|
||||
## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability
|
||||
CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
||||
## GCC build flags (optimized for x86-64-v4)
|
||||
## PGO disabled (used negative optimization on Intel; not tested on GCC)
|
||||
CXXAPPFLAGS = -O3 -march=x86-64-v4 -ffast-math -mfma -flto \
|
||||
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \
|
||||
$(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG)
|
||||
f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
||||
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
|
||||
else
|
||||
## opt (default): maximum performance with PGO profile data -fprofile-instr-use=$(PROFDATA) \
|
||||
## PGO has been turned off, now tested and found to be negative optimization
|
||||
## INTERP_LB_FLAGS has been turned off too, now tested and found to be negative optimization
|
||||
f90appflags = -O3 -march=x86-64-v4 -ffast-math -mfma -flto \
|
||||
-cpp -I${MKLROOT}/include $(POLINT6_FLAG)
|
||||
|
||||
.SUFFIXES: .o .f90 .C .for .cu
|
||||
|
||||
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \
|
||||
$(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG)
|
||||
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
|
||||
endif
|
||||
|
||||
.SUFFIXES: .o .f90 .C .for .cu
|
||||
|
||||
.f90.o:
|
||||
$(f90) $(f90appflags) -c $< -o $@
|
||||
|
||||
.C.o:
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
.for.o:
|
||||
$(f77) -c $< -o $@
|
||||
|
||||
.cu.o:
|
||||
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
|
||||
|
||||
# C rewrite of BSSN RHS kernel and helpers
|
||||
.f90.o:
|
||||
$(f90) $(f90appflags) -c $< -o $@
|
||||
|
||||
.C.o:
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
.for.o:
|
||||
$(f77) -c $< -o $@
|
||||
|
||||
.cu.o:
|
||||
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
|
||||
|
||||
# C rewrite of BSSN RHS kernel and helpers
|
||||
bssn_rhs_c.o: bssn_rhs_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
fderivs_c.o: fderivs_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
fdderivs_c.o: fdderivs_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
kodiss_c.o: kodiss_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
|
||||
fdderivs_c.o: fdderivs_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
kodiss_c.o: kodiss_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
lopsided_c.o: lopsided_c.C
|
||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
@@ -103,22 +86,20 @@ z4c_rhs_c.o: z4c_rhs_c.C
|
||||
|
||||
#interp_lb_profile.o: interp_lb_profile.C interp_lb_profile.h
|
||||
# ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||
|
||||
## TwoPunctureABE uses fixed optimal flags with its own PGO profile, independent of CXXAPPFLAGS
|
||||
TP_PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/TwoPunctureABE.profdata
|
||||
TP_OPTFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||
-fprofile-instr-use=$(TP_PROFDATA) \
|
||||
-Dfortran3 -Dnewc -I${MKLROOT}/include
|
||||
|
||||
TwoPunctures.o: TwoPunctures.C
|
||||
${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@
|
||||
|
||||
TwoPunctureABE.o: TwoPunctureABE.C
|
||||
${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@
|
||||
|
||||
# Input files
|
||||
|
||||
## Kernel implementation switch (set USE_CXX_KERNELS=0 to fall back to Fortran)
|
||||
|
||||
## TwoPunctureABE flags (no PGO; PGO was negative optimization)
|
||||
TP_OPTFLAGS = -O3 -march=x86-64-v4 -ffast-math -mfma -flto \
|
||||
-Dfortran3 -Dnewc -I${MKLROOT}/include
|
||||
|
||||
TwoPunctures.o: TwoPunctures.C
|
||||
${CXX} $(TP_OPTFLAGS) -fopenmp -c $< -o $@
|
||||
|
||||
TwoPunctureABE.o: TwoPunctureABE.C
|
||||
${CXX} $(TP_OPTFLAGS) -fopenmp -c $< -o $@
|
||||
|
||||
# Input files
|
||||
|
||||
## Kernel implementation switch (set USE_CXX_KERNELS=0 to fall back to Fortran)
|
||||
ifeq ($(USE_CXX_KERNELS),0)
|
||||
# Fortran mode: no C rewrite files; bssn_rhs.o is included via F90FILES below
|
||||
CFILES =
|
||||
@@ -144,22 +125,22 @@ RK4_F90_OBJ =
|
||||
else
|
||||
RK4_F90_OBJ = rungekutta4_rout.o
|
||||
endif
|
||||
|
||||
C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
|
||||
cgh.o bssn_class.o surface_integral.o ShellPatch.o\
|
||||
bssnEScalar_class.o perf.o Z4c_class.o NullShellPatch.o\
|
||||
bssnEM_class.o cpbc_util.o z4c_rhs_point.o checkpoint.o\
|
||||
Parallel_bam.o scalar_class.o transpbh.o NullShellPatch2.o\
|
||||
NullShellPatch2_Evo.o writefile_f.o interp_lb_profile.o
|
||||
|
||||
C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
|
||||
cgh.o surface_integral.o ShellPatch.o\
|
||||
bssnEScalar_class.o perf.o Z4c_class.o NullShellPatch.o\
|
||||
bssnEM_class.o cpbc_util.o z4c_rhs_point.o checkpoint.o\
|
||||
Parallel_bam.o scalar_class.o transpbh.o NullShellPatch2.o\
|
||||
NullShellPatch2_Evo.o \
|
||||
bssn_gpu_class.o bssn_step_gpu.o bssn_macro.o writefile_f.o
|
||||
|
||||
|
||||
C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
|
||||
cgh.o bssn_class.o surface_integral.o ShellPatch.o\
|
||||
bssnEScalar_class.o perf.o Z4c_class.o NullShellPatch.o\
|
||||
bssnEM_class.o cpbc_util.o z4c_rhs_point.o checkpoint.o\
|
||||
Parallel_bam.o scalar_class.o transpbh.o NullShellPatch2.o\
|
||||
NullShellPatch2_Evo.o writefile_f.o interp_lb_profile.o
|
||||
|
||||
C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
|
||||
cgh.o surface_integral.o ShellPatch.o\
|
||||
bssnEScalar_class.o perf.o Z4c_class.o NullShellPatch.o\
|
||||
bssnEM_class.o cpbc_util.o z4c_rhs_point.o checkpoint.o\
|
||||
Parallel_bam.o scalar_class.o transpbh.o NullShellPatch2.o\
|
||||
NullShellPatch2_Evo.o \
|
||||
bssn_gpu_class.o bssn_step_gpu.o bssn_macro.o writefile_f.o
|
||||
|
||||
F90FILES_BASE = enforce_algebra.o fmisc.o initial_puncture.o prolongrestrict.o\
|
||||
prolongrestrict_cell.o prolongrestrict_vertex.o\
|
||||
$(RK4_F90_OBJ) diff_new.o kodiss.o kodiss_sh.o\
|
||||
@@ -170,69 +151,69 @@ F90FILES_BASE = enforce_algebra.o fmisc.o initial_puncture.o prolongrestrict.o\
|
||||
cpbc.o getnp4old.o NullEvol.o initial_null.o initial_maxwell.o\
|
||||
getnpem2.o empart.o NullNews.o fourdcurvature.o\
|
||||
bssn2adm.o adm_constraint.o adm_ricci_gamma.o\
|
||||
scalar_rhs.o initial_scalar.o NullEvol2.o initial_null2.o\
|
||||
NullNews2.o tool_f.o
|
||||
|
||||
ifeq ($(USE_CXX_KERNELS),0)
|
||||
# Fortran mode: include original bssn_rhs.o
|
||||
F90FILES = $(F90FILES_BASE) bssn_rhs.o
|
||||
else
|
||||
# C++ mode (default): bssn_rhs.o replaced by C++ kernel
|
||||
F90FILES = $(F90FILES_BASE)
|
||||
endif
|
||||
|
||||
F77FILES = zbesh.o
|
||||
|
||||
AHFDOBJS = expansion.o expansion_Jacobian.o patch.o coords.o patch_info.o patch_interp.o patch_system.o \
|
||||
tgrid.o fd_grid.o ghost_zone.o array.o round.o norm.o fuzzy.o error_exit.o miscfp.o \
|
||||
linear_map.o cpm_map.o BH_diagnostics.o setup.o horizon_sequence.o find_horizons.o \
|
||||
initial_guess.o Newton.o Jacobian.o ilucg.o IntPnts0.o IntPnts.o
|
||||
|
||||
TwoPunctureFILES = TwoPunctureABE.o TwoPunctures.o
|
||||
|
||||
CUDAFILES = bssn_gpu.o bssn_gpu_rhs_ss.o
|
||||
|
||||
# file dependences
|
||||
$(C++FILES) $(C++FILES_GPU) $(F90FILES) $(CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.fh
|
||||
|
||||
$(C++FILES): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
|
||||
misc.h monitor.h MyList.h Parallel.h MPatch.h prolongrestrict.h\
|
||||
rungekutta4_rout.h var.h bssn_class.h bssn_rhs.h sommerfeld_rout.h\
|
||||
cgh.h surface_integral.h ShellPatch.h shellfunctions.h perf.h\
|
||||
fadmquantites_bssn.h cpbc.h getnp4.h initial_null.h NullEvol.h\
|
||||
NullShellPatch.h initial_maxwell.h bssnEM_class.h getnpem2.h\
|
||||
empart.h NullNews.h kodiss.h Parallel_bam.h ricci_gamma.h\
|
||||
initial_null2.h NullShellPatch2.h
|
||||
|
||||
$(C++FILES_GPU): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
|
||||
misc.h monitor.h MyList.h Parallel.h MPatch.h prolongrestrict.h\
|
||||
rungekutta4_rout.h var.h bssn_rhs.h sommerfeld_rout.h\
|
||||
cgh.h surface_integral.h ShellPatch.h shellfunctions.h perf.h\
|
||||
fadmquantites_bssn.h cpbc.h getnp4.h initial_null.h NullEvol.h\
|
||||
NullShellPatch.h initial_maxwell.h bssnEM_class.h getnpem2.h\
|
||||
empart.h NullNews.h kodiss.h Parallel_bam.h ricci_gamma.h\
|
||||
initial_null2.h NullShellPatch2.h \
|
||||
bssn_gpu_class.h bssn_macro.h
|
||||
|
||||
$(AHFDOBJS): cctk.h cctk_Config.h cctk_Types.h cctk_Constants.h myglobal.h
|
||||
|
||||
$(C++FILES) $(C++FILES_GPU) $(CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.h
|
||||
|
||||
TwoPunctureFILES: TwoPunctures.h
|
||||
|
||||
$(CUDAFILES): bssn_gpu.h gpu_mem.h gpu_rhsSS_mem.h
|
||||
|
||||
misc.o : zbesh.o
|
||||
|
||||
# projects
|
||||
ABE: $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS)
|
||||
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS)
|
||||
|
||||
ABEGPU: $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES)
|
||||
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
|
||||
|
||||
TwoPunctureABE: $(TwoPunctureFILES)
|
||||
$(CLINKER) $(TP_OPTFLAGS) -qopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS)
|
||||
|
||||
clean:
|
||||
rm *.o ABE ABEGPU TwoPunctureABE make.log -f
|
||||
scalar_rhs.o initial_scalar.o NullEvol2.o initial_null2.o\
|
||||
NullNews2.o tool_f.o
|
||||
|
||||
ifeq ($(USE_CXX_KERNELS),0)
|
||||
# Fortran mode: include original bssn_rhs.o
|
||||
F90FILES = $(F90FILES_BASE) bssn_rhs.o
|
||||
else
|
||||
# C++ mode (default): bssn_rhs.o replaced by C++ kernel
|
||||
F90FILES = $(F90FILES_BASE)
|
||||
endif
|
||||
|
||||
F77FILES = zbesh.o
|
||||
|
||||
AHFDOBJS = expansion.o expansion_Jacobian.o patch.o coords.o patch_info.o patch_interp.o patch_system.o \
|
||||
tgrid.o fd_grid.o ghost_zone.o array.o round.o norm.o fuzzy.o error_exit.o miscfp.o \
|
||||
linear_map.o cpm_map.o BH_diagnostics.o setup.o horizon_sequence.o find_horizons.o \
|
||||
initial_guess.o Newton.o Jacobian.o ilucg.o IntPnts0.o IntPnts.o
|
||||
|
||||
TwoPunctureFILES = TwoPunctureABE.o TwoPunctures.o
|
||||
|
||||
CUDAFILES = bssn_gpu.o bssn_gpu_rhs_ss.o
|
||||
|
||||
# file dependences
|
||||
$(C++FILES) $(C++FILES_GPU) $(F90FILES) $(CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.fh
|
||||
|
||||
$(C++FILES): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
|
||||
misc.h monitor.h MyList.h Parallel.h MPatch.h prolongrestrict.h\
|
||||
rungekutta4_rout.h var.h bssn_class.h bssn_rhs.h sommerfeld_rout.h\
|
||||
cgh.h surface_integral.h ShellPatch.h shellfunctions.h perf.h\
|
||||
fadmquantites_bssn.h cpbc.h getnp4.h initial_null.h NullEvol.h\
|
||||
NullShellPatch.h initial_maxwell.h bssnEM_class.h getnpem2.h\
|
||||
empart.h NullNews.h kodiss.h Parallel_bam.h ricci_gamma.h\
|
||||
initial_null2.h NullShellPatch2.h
|
||||
|
||||
$(C++FILES_GPU): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
|
||||
misc.h monitor.h MyList.h Parallel.h MPatch.h prolongrestrict.h\
|
||||
rungekutta4_rout.h var.h bssn_rhs.h sommerfeld_rout.h\
|
||||
cgh.h surface_integral.h ShellPatch.h shellfunctions.h perf.h\
|
||||
fadmquantites_bssn.h cpbc.h getnp4.h initial_null.h NullEvol.h\
|
||||
NullShellPatch.h initial_maxwell.h bssnEM_class.h getnpem2.h\
|
||||
empart.h NullNews.h kodiss.h Parallel_bam.h ricci_gamma.h\
|
||||
initial_null2.h NullShellPatch2.h \
|
||||
bssn_gpu_class.h bssn_macro.h
|
||||
|
||||
$(AHFDOBJS): cctk.h cctk_Config.h cctk_Types.h cctk_Constants.h myglobal.h
|
||||
|
||||
$(C++FILES) $(C++FILES_GPU) $(CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.h
|
||||
|
||||
TwoPunctureFILES: TwoPunctures.h
|
||||
|
||||
$(CUDAFILES): bssn_gpu.h gpu_mem.h gpu_rhsSS_mem.h
|
||||
|
||||
misc.o : zbesh.o
|
||||
|
||||
# projects
|
||||
ABE: $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS)
|
||||
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS)
|
||||
|
||||
ABEGPU: $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES)
|
||||
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
|
||||
|
||||
TwoPunctureABE: $(TwoPunctureFILES)
|
||||
$(CLINKER) $(TP_OPTFLAGS) -fopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS)
|
||||
|
||||
clean:
|
||||
rm *.o ABE ABEGPU TwoPunctureABE make.log -f
|
||||
|
||||
@@ -1,33 +1,16 @@
|
||||
## GCC version (commented out)
|
||||
## filein = -I/usr/include -I/usr/lib/x86_64-linux-gnu/mpich/include -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
|
||||
## filein = -I/usr/include/ -I/usr/include/openmpi-x86_64/ -I/usr/lib/x86_64-linux-gnu/openmpi/include/ -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
|
||||
## LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -lmpi -lgfortran
|
||||
|
||||
## Intel oneAPI version with oneMKL (Optimized for performance)
|
||||
## GCC version with OpenMPI and oneMKL
|
||||
filein = -I/usr/include/ -I${MKLROOT}/include
|
||||
|
||||
## Using sequential MKL (OpenMP disabled for better single-threaded performance)
|
||||
## Added -lifcore for Intel Fortran runtime and -limf for Intel math library
|
||||
LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore -limf -lpthread -lm -ldl -liomp5
|
||||
## Using MKL with gfortran interface (-lmkl_gf_lp64 instead of -lmkl_intel_lp64)
|
||||
LDLIBS = -L${MKLROOT}/lib -lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lgfortran -lpthread -lm -ldl -lgomp
|
||||
|
||||
## Memory allocator switch
|
||||
## 1 (default) : link Intel oneTBB allocator (libtbbmalloc)
|
||||
## 0 : use system default allocator (ptmalloc)
|
||||
USE_TBBMALLOC ?= 1
|
||||
TBBMALLOC_SO ?= /home/intel/oneapi/2025.3/lib/libtbbmalloc.so
|
||||
ifneq ($(wildcard $(TBBMALLOC_SO)),)
|
||||
TBBMALLOC_LIBS = -Wl,--no-as-needed $(TBBMALLOC_SO) -Wl,--as-needed
|
||||
else
|
||||
TBBMALLOC_LIBS = -Wl,--no-as-needed -ltbbmalloc -Wl,--as-needed
|
||||
## 0 (default) : use system default allocator (ptmalloc)
|
||||
## 1 : use jemalloc (install jemalloc-devel first)
|
||||
USE_JEMALLOC ?= 0
|
||||
ifeq ($(USE_JEMALLOC),1)
|
||||
LDLIBS := -ljemalloc $(LDLIBS)
|
||||
endif
|
||||
ifeq ($(USE_TBBMALLOC),1)
|
||||
LDLIBS := $(TBBMALLOC_LIBS) $(LDLIBS)
|
||||
endif
|
||||
|
||||
## PGO build mode switch (ABE only; TwoPunctureABE always uses opt flags)
|
||||
## opt : (default) maximum performance with PGO profile-guided optimization
|
||||
## instrument : PGO Phase 1 instrumentation to collect fresh profile data
|
||||
PGO_MODE ?= opt
|
||||
|
||||
## Interp_Points load balance profiling mode
|
||||
## off : (default) no load balance instrumentation
|
||||
@@ -70,13 +53,12 @@ USE_TRANSFER_CACHE ?= auto
|
||||
## 0 : use original Fortran rungekutta4_rout.o
|
||||
USE_CXX_RK4 ?= 1
|
||||
|
||||
f90 = ifx
|
||||
f77 = ifx
|
||||
CXX = icpx
|
||||
CC = icx
|
||||
CLINKER = mpiicpx
|
||||
f90 = gfortran
|
||||
f77 = gfortran
|
||||
CXX = g++
|
||||
CC = gcc
|
||||
CLINKER = mpicxx
|
||||
|
||||
Cu = nvcc
|
||||
CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include
|
||||
#CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -arch compute_13 -code compute_13,sm_13 -Dfortran3 -Dnewc
|
||||
CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc
|
||||
|
||||
Reference in New Issue
Block a user