Add runnable GPU main-path prototype

2026-04-08 19:14:37 +08:00
parent 8c1f4d8108
commit ea470737db
10 changed files with 1068 additions and 39 deletions
--- a/AMSS_NCKU_source/makefile.inc
+++ b/AMSS_NCKU_source/makefile.inc
@@ -9,6 +9,7 @@ filein  = -I/usr/include/ -I${MKLROOT}/include
 ## Using sequential MKL (OpenMP disabled for better single-threaded performance)
 ## Added -lifcore for Intel Fortran runtime and -limf for Intel math library
 LDLIBS  = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore -limf -lpthread -lm -ldl -liomp5
+CUDA_LDLIBS = -L/usr/local/cuda-12.9/targets/x86_64-linux/lib -lcudart

 ## Memory allocator switch
 ##   1 (default) : link Intel oneTBB allocator (libtbbmalloc)
@@ -24,6 +25,8 @@ ifeq ($(USE_TBBMALLOC),1)
 LDLIBS := $(TBBMALLOC_LIBS) $(LDLIBS)
 endif

+LDLIBS := $(CUDA_LDLIBS) $(LDLIBS)
+
 ## PGO build mode switch (ABE only; TwoPunctureABE always uses opt flags)
 ##   opt        : (default) maximum performance with PGO profile-guided optimization
 ##   instrument : PGO Phase 1 instrumentation to collect fresh profile data