Vortex 2.0 changes:
+ Microarchitecture optimizations + 64-bit support + Xilinx FPGA support + LLVM-16 support + Refactoring and quality control fixes
This commit is contained in:
@@ -1,67 +1,7 @@
|
||||
XLEN ?= 32
|
||||
|
||||
LLVM_PREFIX ?= /opt/llvm-riscv
|
||||
RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain
|
||||
SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/riscv32-unknown-elf
|
||||
POCL_CC_PATH ?= /opt/pocl/compiler
|
||||
POCL_RT_PATH ?= /opt/pocl/runtime
|
||||
|
||||
VORTEX_DRV_PATH ?= $(realpath ../../../driver)
|
||||
VORTEX_RT_PATH ?= $(realpath ../../../runtime)
|
||||
|
||||
K_LLCFLAGS += "-O3 -march=riscv32 -target-abi=ilp32f -mcpu=generic-rv32 -mattr=+m,+f -mattr=+vortex -float-abi=hard -code-model=small"
|
||||
K_CFLAGS += "-v -O3 --sysroot=$(SYSROOT) --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) -march=rv32imf -mabi=ilp32f -Xclang -target-feature -Xclang +vortex -I$(VORTEX_RT_PATH)/include -fno-rtti -fno-exceptions -ffreestanding -nostartfiles -fdata-sections -ffunction-sections"
|
||||
K_LDFLAGS += "-Wl,-Bstatic,-T$(VORTEX_RT_PATH)/linker/vx_link$(XLEN).ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a -lm"
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I$(POCL_RT_PATH)/include
|
||||
|
||||
LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(VORTEX_DRV_PATH)/stub -lOpenCL -lvortex
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0
|
||||
else
|
||||
CXXFLAGS += -O2 -DNDEBUG
|
||||
endif
|
||||
|
||||
PROJECT = kmeans
|
||||
|
||||
SRCS = main.cc read_input.c rmse.c kmeans_clustering.c cluster.c getopt.c
|
||||
|
||||
all: $(PROJECT) kernel.pocl
|
||||
OPTS ?=
|
||||
|
||||
kernel.pocl: kernel.cl
|
||||
LLVM_PREFIX=$(LLVM_PREFIX) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_PREFIX)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -LLCFLAGS $(K_LLCFLAGS) -CFLAGS $(K_CFLAGS) -LDFLAGS $(K_LDFLAGS) -o kernel.pocl kernel.cl
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
run-fpga: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/fpga:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-asesim: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/asesim:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-vlsim: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-simx: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-rtlsim: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) *.o .depend
|
||||
|
||||
clean-all: clean
|
||||
rm -rf *.pocl *.dump
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
||||
include ../common.mk
|
||||
|
||||
Binary file not shown.
@@ -170,6 +170,7 @@ float** kmeans_clustering(float **feature, /* in: [npoints][nfeatures] */
|
||||
free(new_centers[0]);
|
||||
free(new_centers);
|
||||
free(new_centers_len);
|
||||
free(initial);
|
||||
|
||||
return clusters;
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -104,8 +104,8 @@ static int initialize(int use_gpu) {
|
||||
context = clCreateContext(NULL, 1, device_list, NULL, NULL, &result);
|
||||
|
||||
// create command queue for the first device
|
||||
cmd_queue = clCreateCommandQueue(context, device_list[0], 0, NULL);
|
||||
if (!cmd_queue) {
|
||||
cmd_queue = clCreateCommandQueue(context, device_list[0], 0, &result);
|
||||
if (!cmd_queue || result != CL_SUCCESS) {
|
||||
printf("ERROR: clCreateCommandQueue() failed\n");
|
||||
return -1;
|
||||
}
|
||||
@@ -120,7 +120,7 @@ static int shutdown() {
|
||||
if (context)
|
||||
clReleaseContext(context);
|
||||
if (device_list)
|
||||
delete device_list;
|
||||
delete [] device_list;
|
||||
|
||||
// reset all variables
|
||||
cmd_queue = 0;
|
||||
@@ -188,7 +188,7 @@ int allocate(int n_points, int n_features, int n_clusters, float **feature) {
|
||||
fread(source + strlen(source), sourcesize, 1, fp);
|
||||
fclose(fp);*/
|
||||
|
||||
// OpenCL initialization
|
||||
// OpenCL initialization
|
||||
int use_gpu = 1;
|
||||
if (initialize(use_gpu))
|
||||
return -1;
|
||||
@@ -197,12 +197,25 @@ int allocate(int n_points, int n_features, int n_clusters, float **feature) {
|
||||
cl_int err = 0;
|
||||
//const char *slist[2] = {source, 0};
|
||||
//cl_program prog = clCreateProgramWithSource(context, 1, slist, NULL, &err);
|
||||
cl_program prog = clCreateProgramWithBuiltInKernels(context, 1, device_list, "kmeans_kernel_c;kmeans_swap", &err);
|
||||
uint8_t *kernel_bin = NULL;
|
||||
size_t kernel_size;
|
||||
cl_int binary_status = 0;
|
||||
err = read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("ERROR: clCreateProgramWithSource() => %d\n", err);
|
||||
printf("ERROR: read_kernel_file() => %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
err = clBuildProgram(prog, 0, NULL, NULL, NULL, NULL);
|
||||
|
||||
cl_program prog = clCreateProgramWithBinary(
|
||||
context, 1, device_list, &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &err);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("ERROR: clCreateProgramWithBinary() => %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
free(kernel_bin);
|
||||
|
||||
err = clBuildProgram(prog, 1, &device_list[0], NULL, NULL, NULL);
|
||||
{ // show warnings/errors
|
||||
// static char log[65536]; memset(log, 0, sizeof(log));
|
||||
// cl_device_id device_id = 0;
|
||||
@@ -226,6 +239,7 @@ int allocate(int n_points, int n_features, int n_clusters, float **feature) {
|
||||
printf("ERROR: clCreateKernel() 0 => %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel2 = clCreateKernel(prog, kernel_swap, &err);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("ERROR: clCreateKernel() 0 => %d\n", err);
|
||||
@@ -241,6 +255,7 @@ int allocate(int n_points, int n_features, int n_clusters, float **feature) {
|
||||
n_points * n_features, err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
d_feature_swap =
|
||||
clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
n_points * n_features * sizeof(float), NULL, &err);
|
||||
@@ -249,6 +264,7 @@ int allocate(int n_points, int n_features, int n_clusters, float **feature) {
|
||||
n_points * n_features, err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
d_cluster =
|
||||
clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
n_clusters * n_features * sizeof(float), NULL, &err);
|
||||
@@ -257,6 +273,7 @@ int allocate(int n_points, int n_features, int n_clusters, float **feature) {
|
||||
n_clusters * n_features, err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
d_membership = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
n_points * sizeof(int), NULL, &err);
|
||||
if (err != CL_SUCCESS) {
|
||||
@@ -296,6 +313,8 @@ int allocate(int n_points, int n_features, int n_clusters, float **feature) {
|
||||
}
|
||||
|
||||
membership_OCL = (int *)malloc(n_points * sizeof(int));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void deallocateMemory() {
|
||||
|
||||
@@ -331,7 +331,9 @@ int setup(int argc, char **argv) {
|
||||
}
|
||||
}
|
||||
|
||||
/* free up memory */
|
||||
/* free up memory */
|
||||
free(cluster_centres[0]);
|
||||
free(cluster_centres);
|
||||
free(features[0]);
|
||||
free(features);
|
||||
return (0);
|
||||
|
||||
Reference in New Issue
Block a user