Vortex 2.0 changes:
+ Microarchitecture optimizations + 64-bit support + Xilinx FPGA support + LLVM-16 support + Refactoring and quality control fixes minor update minor update minor update minor update minor update minor update cleanup cleanup cache bindings and memory perf refactory minor update minor update hw unit tests fixes minor update minor update minor update minor update minor update minor udpate minor update minor update minor update minor update minor update minor update minor update minor updates minor updates minor update minor update minor update minor update minor update minor update minor updates minor updates minor updates minor updates minor update minor update
This commit is contained in:
@@ -1,69 +1,9 @@
|
||||
XLEN ?= 32
|
||||
|
||||
RISCV_TOOLCHAIN_PATH ?= $(wildcard ../../../../riscv-gnu-toolchain/drops)
|
||||
POCL_CC_PATH ?= $(wildcard ../../../../pocl/drops_riscv_cc)
|
||||
POCL_INC_PATH ?= $(wildcard ../include)
|
||||
POCL_LIB_PATH ?= $(wildcard ../lib)
|
||||
VORTEX_RT_PATH ?= $(wildcard ../../../runtime)
|
||||
VX_SIMX_PATH ?= $(wildcard ../../../simx/obj_dir)
|
||||
|
||||
CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
|
||||
CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
||||
DMP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
HEX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
GDB = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gdb
|
||||
|
||||
VX_SRCS = $(VORTEX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VORTEX_RT_PATH)/startup/vx_start.S
|
||||
VX_SRCS += $(VORTEX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_SRCS += $(VORTEX_RT_PATH)/io/vx_io.S $(VORTEX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VORTEX_RT_PATH)/fileio/fileio.S
|
||||
VX_SRCS += $(VORTEX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VORTEX_RT_PATH)/vx_api/vx_api.c
|
||||
|
||||
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/startup/vx_link$(XLEN).ld
|
||||
|
||||
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
|
||||
CXXFLAGS += -ffreestanding # program may not begin at main()
|
||||
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
|
||||
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
|
||||
CXXFLAGS += -I$(POCL_INC_PATH) -I.
|
||||
|
||||
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
QEMU_LIBS = $(VORTEX_RT_PATH)/qemu/vx_api.c -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
|
||||
|
||||
PROJECT = cutcp
|
||||
|
||||
SRCS = main.cc args.c parboil_opencl.c ocl.c gpu_info.c cutoff.c cutcpu.c output.c readatom.c excl.c
|
||||
|
||||
all: $(PROJECT).dump $(PROJECT).hex
|
||||
CXXFLAGS += -I.
|
||||
|
||||
lib$(PROJECT).a: kernel.cl
|
||||
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOLCHAIN_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
|
||||
OPTS ?=
|
||||
|
||||
$(PROJECT).elf: $(SRCS) lib$(PROJECT).a
|
||||
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf
|
||||
|
||||
$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a
|
||||
$(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu
|
||||
|
||||
$(PROJECT).hex: $(PROJECT).elf
|
||||
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
|
||||
|
||||
$(PROJECT).dump: $(PROJECT).elf
|
||||
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
|
||||
|
||||
run: $(PROJECT).hex
|
||||
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E $(PROJECT).hex -s -b 1> emulator.debug
|
||||
|
||||
qemu: $(PROJECT).qemu
|
||||
POCL_DEBUG=all $(RISCV_TOOLCHAIN_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu
|
||||
|
||||
gdb-s: $(PROJECT).qemu
|
||||
POCL_DEBUG=all $(RISCV_TOOLCHAIN_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
|
||||
|
||||
gdb-c: $(PROJECT).qemu
|
||||
$(GDB) $(PROJECT).qemu
|
||||
|
||||
clean:
|
||||
rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug
|
||||
include ../common.mk
|
||||
|
||||
@@ -19,6 +19,27 @@
|
||||
#include "macros.h"
|
||||
#include "ocl.h"
|
||||
|
||||
static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
|
||||
if (NULL == filename || NULL == data || 0 == size)
|
||||
return CL_INVALID_VALUE;
|
||||
|
||||
FILE* fp = fopen(filename, "r");
|
||||
if (NULL == fp) {
|
||||
fprintf(stderr, "Failed to load kernel.");
|
||||
return CL_INVALID_VALUE;
|
||||
}
|
||||
fseek(fp , 0 , SEEK_END);
|
||||
long fsize = ftell(fp);
|
||||
rewind(fp);
|
||||
|
||||
*data = (uint8_t*)malloc(fsize);
|
||||
*size = fread(*data, 1, fsize, fp);
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
// OpenCL 1.1 support for int3 is not uniform on all implementations, so
|
||||
// we use int4 instead. Only the 'x', 'y', and 'z' fields of xyz are used.
|
||||
typedef cl_int4 xyz;
|
||||
@@ -294,8 +315,6 @@ int gpu_compute_cutoff_potential_lattice(
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
printf("Ok!\n");
|
||||
|
||||
pb_Context* pb_context;
|
||||
pb_context = pb_InitOpenCLContext(parameters);
|
||||
if (pb_context == NULL) {
|
||||
@@ -303,8 +322,6 @@ int gpu_compute_cutoff_potential_lattice(
|
||||
return -1;
|
||||
}
|
||||
|
||||
printf("Ok!\n");
|
||||
|
||||
cl_int clStatus;
|
||||
cl_device_id clDevice = (cl_device_id) pb_context->clDeviceId;
|
||||
cl_platform_id clPlatform = (cl_platform_id) pb_context->clPlatformId;
|
||||
@@ -317,8 +334,13 @@ int gpu_compute_cutoff_potential_lattice(
|
||||
|
||||
//const char* clSource[] = {readFile("src/opencl_base/kernel.cl")};
|
||||
//cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus);
|
||||
cl_program clProgram = clCreateProgramWithBuiltInKernels(
|
||||
clContext, 1, &clDevice, "opencl_cutoff_potential_lattice", &clStatus);
|
||||
uint8_t *kernel_bin = NULL;
|
||||
size_t kernel_size;
|
||||
cl_int binary_status = 0;
|
||||
clStatus = read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size);
|
||||
CHECK_ERROR("read_kernel_file")
|
||||
cl_program clProgram = clCreateProgramWithBinary(
|
||||
clContext, 1, &clDevice, &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &clStatus);
|
||||
CHECK_ERROR("clCreateProgramWithSource")
|
||||
|
||||
char clOptions[50];
|
||||
@@ -399,9 +421,6 @@ int gpu_compute_cutoff_potential_lattice(
|
||||
clStatus = clSetKernelArg(clKernel,10,sizeof(cl_mem),&NbrList);
|
||||
CHECK_ERROR("clSetKernelArg")
|
||||
|
||||
printf("Ok!!\n");
|
||||
|
||||
|
||||
/* loop over z-dimension, invoke OpenCL kernel for each x-y plane */
|
||||
pb_SwitchToTimer(timers, pb_TimerID_KERNEL);
|
||||
printf("Invoking OpenCL kernel on %d region planes...\n", zRegionDim);
|
||||
@@ -412,26 +431,16 @@ int gpu_compute_cutoff_potential_lattice(
|
||||
clStatus = clSetKernelArg(clKernel,8,sizeof(int),&zRegionIndex);
|
||||
CHECK_ERROR("clSetKernelArg")
|
||||
|
||||
printf("Ok**!2\n");
|
||||
|
||||
clStatus = clEnqueueNDRangeKernel(clCommandQueue,clKernel,3,NULL,gridDim,blockDim,0,NULL,NULL);
|
||||
|
||||
printf("Ok**!2\n");
|
||||
|
||||
CHECK_ERROR("clEnqueueNDRangeKernel")
|
||||
|
||||
printf("Ok**!2\n");
|
||||
|
||||
clStatus = clFinish(clCommandQueue);
|
||||
|
||||
printf("Ok**!2\n");
|
||||
|
||||
CHECK_ERROR("clFinish")
|
||||
}
|
||||
|
||||
printf("Ok++!\n");
|
||||
|
||||
printf("Finished OpenCL kernel calls \n");
|
||||
printf("Finished OpenCL kernel calls\n");
|
||||
|
||||
/* copy result regions from OpenCL device */
|
||||
pb_SwitchToTimer(timers, pb_TimerID_COPY);
|
||||
|
||||
@@ -9,6 +9,10 @@
|
||||
#ifndef __GPUINFOH__
|
||||
#define __GPUINFOH__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void compute_active_thread(size_t *thread,
|
||||
size_t *grid,
|
||||
int task,
|
||||
@@ -17,4 +21,8 @@ void compute_active_thread(size_t *thread,
|
||||
int minor,
|
||||
int sm);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
Binary file not shown.
@@ -124,8 +124,6 @@ int main(int argc, char *argv[]) {
|
||||
pb_InitializeTimerSet(&timers);
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_IO);
|
||||
|
||||
printf("OK\n");
|
||||
|
||||
{
|
||||
const char *pqrfilename = parameters->inpFiles[0];
|
||||
|
||||
@@ -136,8 +134,6 @@ int main(int argc, char *argv[]) {
|
||||
printf("read %d atoms from file '%s'\n", atom->size, pqrfilename);
|
||||
}
|
||||
|
||||
printf("OK\n");
|
||||
|
||||
/* find extent of domain */
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
|
||||
get_atom_extent(&min_ext, &max_ext, atom);
|
||||
|
||||
@@ -3,6 +3,10 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void clMemSet(cl_command_queue, cl_mem, int, size_t);
|
||||
char* readFile(const char*);
|
||||
|
||||
@@ -14,4 +18,8 @@ char* readFile(const char*);
|
||||
exit(1); \
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user