Vortex 2.0 changes:
+ Microarchitecture optimizations + 64-bit support + Xilinx FPGA support + LLVM-16 support + Refactoring and quality control fixes
This commit is contained in:
@@ -1,69 +1,9 @@
|
||||
XLEN ?= 32
|
||||
|
||||
RISCV_TOOLCHAIN_PATH ?= $(wildcard ../../../../riscv-gnu-toolchain/drops)
|
||||
POCL_CC_PATH ?= $(wildcard ../../../../pocl/drops_riscv_cc)
|
||||
POCL_INC_PATH ?= $(wildcard ../include)
|
||||
POCL_LIB_PATH ?= $(wildcard ../lib)
|
||||
VORTEX_RT_PATH ?= $(wildcard ../../../runtime)
|
||||
VX_SIMX_PATH ?= $(wildcard ../../../simx/obj_dir)
|
||||
|
||||
CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
|
||||
CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
||||
DMP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
HEX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
GDB = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gdb
|
||||
|
||||
VX_SRCS = $(VORTEX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VORTEX_RT_PATH)/startup/vx_start.S
|
||||
VX_SRCS += $(VORTEX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_SRCS += $(VORTEX_RT_PATH)/io/vx_io.S $(VORTEX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VORTEX_RT_PATH)/fileio/fileio.S
|
||||
VX_SRCS += $(VORTEX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VORTEX_RT_PATH)/vx_api/vx_api.c
|
||||
|
||||
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/startup/vx_link$(XLEN).ld
|
||||
|
||||
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
|
||||
CXXFLAGS += -ffreestanding # program may not begin at main()
|
||||
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
|
||||
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
||||
CXXFLAGS += -I$(POCL_INC_PATH) -I.
|
||||
|
||||
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
QEMU_LIBS = $(VORTEX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
|
||||
PROJECT = cutcp
|
||||
|
||||
SRCS = main.cc args.c parboil_opencl.c ocl.c gpu_info.c cutoff.c cutcpu.c output.c readatom.c excl.c
|
||||
|
||||
all: $(PROJECT).dump $(PROJECT).hex
|
||||
CXXFLAGS += -I.
|
||||
|
||||
lib$(PROJECT).a: kernel.cl
|
||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOLCHAIN_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
||||
OPTS ?=
|
||||
|
||||
$(PROJECT).elf: $(SRCS) lib$(PROJECT).a
|
||||
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf
|
||||
|
||||
$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a
|
||||
$(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu
|
||||
|
||||
$(PROJECT).hex: $(PROJECT).elf
|
||||
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
||||
|
||||
$(PROJECT).dump: $(PROJECT).elf
|
||||
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
|
||||
|
||||
run: $(PROJECT).hex
|
||||
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E $(PROJECT).hex -s -b 1> emulator.debug
|
||||
|
||||
qemu: $(PROJECT).qemu
|
||||
POCL_DEBUG=all $(RISCV_TOOLCHAIN_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu
|
||||
|
||||
gdb-s: $(PROJECT).qemu
|
||||
POCL_DEBUG=all $(RISCV_TOOLCHAIN_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
|
||||
|
||||
gdb-c: $(PROJECT).qemu
|
||||
$(GDB) $(PROJECT).qemu
|
||||
|
||||
clean:
|
||||
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug
|
||||
include ../common.mk
|
||||
|
||||
@@ -19,6 +19,27 @@
|
||||
#include "macros.h"
|
||||
#include "ocl.h"
|
||||
|
||||
static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
|
||||
if (NULL == filename || NULL == data || 0 == size)
|
||||
return CL_INVALID_VALUE;
|
||||
|
||||
FILE* fp = fopen(filename, "r");
|
||||
if (NULL == fp) {
|
||||
fprintf(stderr, "Failed to load kernel.");
|
||||
return CL_INVALID_VALUE;
|
||||
}
|
||||
fseek(fp , 0 , SEEK_END);
|
||||
long fsize = ftell(fp);
|
||||
rewind(fp);
|
||||
|
||||
*data = (uint8_t*)malloc(fsize);
|
||||
*size = fread(*data, 1, fsize, fp);
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
// OpenCL 1.1 support for int3 is not uniform on all implementations, so
|
||||
// we use int4 instead. Only the 'x', 'y', and 'z' fields of xyz are used.
|
||||
typedef cl_int4 xyz;
|
||||
@@ -294,8 +315,6 @@ int gpu_compute_cutoff_potential_lattice(
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
printf("Ok!\n");
|
||||
|
||||
pb_Context* pb_context;
|
||||
pb_context = pb_InitOpenCLContext(parameters);
|
||||
if (pb_context == NULL) {
|
||||
@@ -303,8 +322,6 @@ int gpu_compute_cutoff_potential_lattice(
|
||||
return -1;
|
||||
}
|
||||
|
||||
printf("Ok!\n");
|
||||
|
||||
cl_int clStatus;
|
||||
cl_device_id clDevice = (cl_device_id) pb_context->clDeviceId;
|
||||
cl_platform_id clPlatform = (cl_platform_id) pb_context->clPlatformId;
|
||||
@@ -317,8 +334,13 @@ int gpu_compute_cutoff_potential_lattice(
|
||||
|
||||
//const char* clSource[] = {readFile("src/opencl_base/kernel.cl")};
|
||||
//cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus);
|
||||
cl_program clProgram = clCreateProgramWithBuiltInKernels(
|
||||
clContext, 1, &clDevice, "opencl_cutoff_potential_lattice", &clStatus);
|
||||
uint8_t *kernel_bin = NULL;
|
||||
size_t kernel_size;
|
||||
cl_int binary_status = 0;
|
||||
clStatus = read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size);
|
||||
CHECK_ERROR("read_kernel_file")
|
||||
cl_program clProgram = clCreateProgramWithBinary(
|
||||
clContext, 1, &clDevice, &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &clStatus);
|
||||
CHECK_ERROR("clCreateProgramWithSource")
|
||||
|
||||
char clOptions[50];
|
||||
@@ -399,9 +421,6 @@ int gpu_compute_cutoff_potential_lattice(
|
||||
clStatus = clSetKernelArg(clKernel,10,sizeof(cl_mem),&NbrList);
|
||||
CHECK_ERROR("clSetKernelArg")
|
||||
|
||||
printf("Ok!!\n");
|
||||
|
||||
|
||||
/* loop over z-dimension, invoke OpenCL kernel for each x-y plane */
|
||||
pb_SwitchToTimer(timers, pb_TimerID_KERNEL);
|
||||
printf("Invoking OpenCL kernel on %d region planes...\n", zRegionDim);
|
||||
@@ -412,26 +431,16 @@ int gpu_compute_cutoff_potential_lattice(
|
||||
clStatus = clSetKernelArg(clKernel,8,sizeof(int),&zRegionIndex);
|
||||
CHECK_ERROR("clSetKernelArg")
|
||||
|
||||
printf("Ok**!2\n");
|
||||
|
||||
clStatus = clEnqueueNDRangeKernel(clCommandQueue,clKernel,3,NULL,gridDim,blockDim,0,NULL,NULL);
|
||||
|
||||
printf("Ok**!2\n");
|
||||
|
||||
CHECK_ERROR("clEnqueueNDRangeKernel")
|
||||
|
||||
printf("Ok**!2\n");
|
||||
|
||||
clStatus = clFinish(clCommandQueue);
|
||||
|
||||
printf("Ok**!2\n");
|
||||
|
||||
CHECK_ERROR("clFinish")
|
||||
}
|
||||
|
||||
printf("Ok++!\n");
|
||||
|
||||
printf("Finished OpenCL kernel calls \n");
|
||||
printf("Finished OpenCL kernel calls\n");
|
||||
|
||||
/* copy result regions from OpenCL device */
|
||||
pb_SwitchToTimer(timers, pb_TimerID_COPY);
|
||||
|
||||
@@ -9,6 +9,10 @@
|
||||
#ifndef __GPUINFOH__
|
||||
#define __GPUINFOH__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void compute_active_thread(size_t *thread,
|
||||
size_t *grid,
|
||||
int task,
|
||||
@@ -17,4 +21,8 @@ void compute_active_thread(size_t *thread,
|
||||
int minor,
|
||||
int sm);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
Binary file not shown.
@@ -124,8 +124,6 @@ int main(int argc, char *argv[]) {
|
||||
pb_InitializeTimerSet(&timers);
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_IO);
|
||||
|
||||
printf("OK\n");
|
||||
|
||||
{
|
||||
const char *pqrfilename = parameters->inpFiles[0];
|
||||
|
||||
@@ -136,8 +134,6 @@ int main(int argc, char *argv[]) {
|
||||
printf("read %d atoms from file '%s'\n", atom->size, pqrfilename);
|
||||
}
|
||||
|
||||
printf("OK\n");
|
||||
|
||||
/* find extent of domain */
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
|
||||
get_atom_extent(&min_ext, &max_ext, atom);
|
||||
|
||||
@@ -3,6 +3,10 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void clMemSet(cl_command_queue, cl_mem, int, size_t);
|
||||
char* readFile(const char*);
|
||||
|
||||
@@ -14,4 +18,8 @@ char* readFile(const char*);
|
||||
exit(1); \
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user