diff --git a/benchmarks/opencl/saxpy/Makefile b/benchmarks/opencl/saxpy/Makefile index 9d6f91b6..0414fcff 100644 --- a/benchmarks/opencl/saxpy/Makefile +++ b/benchmarks/opencl/saxpy/Makefile @@ -7,6 +7,8 @@ POCL_RT_PATH ?= /opt/pocl/runtime VORTEX_DRV_PATH ?= $(realpath ../../../driver) VORTEX_RT_PATH ?= $(realpath ../../../runtime) +OPTS ?= -n1024 + K_LLCFLAGS += "-O3 -march=riscv32 -target-abi=ilp32f -mcpu=generic-rv32 -mattr=+m,+f -float-abi=hard -code-model=small" K_CFLAGS += "-v -O3 --sysroot=$(SYSROOT) --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) -march=rv32imf -mabi=ilp32f -I$(VORTEX_RT_PATH)/include -fno-rtti -fno-exceptions -ffreestanding -nostartfiles -fdata-sections -ffunction-sections" K_LDFLAGS += "-Wl,-Bstatic,-T$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a -lm" @@ -33,19 +35,19 @@ $(PROJECT): $(SRCS) $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ run-fpga: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) run-asesim: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) run-vlsim: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) run-simx: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) run-rtlsim: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) .depend: $(SRCS) $(CXX) $(CXXFLAGS) -MM $^ > .depend; diff --git a/benchmarks/opencl/saxpy/main.cc b/benchmarks/opencl/saxpy/main.cc index 7d64f03d..d4076d70 100644 --- a/benchmarks/opencl/saxpy/main.cc +++ b/benchmarks/opencl/saxpy/main.cc @@ -29,11 +29,9 @@ #include #include #include +#include #include -//#define NUM_DATA 65536 -#define NUM_DATA 1024 - #define CL_CHECK(_expr) \ do { \ cl_int _err = _expr; \ @@ -85,14 +83,18 @@ uint8_t *kernel_bin = NULL; /// // Cleanup any created OpenCL resources // -void Cleanup(cl_context context, cl_command_queue commandQueue, - cl_program program, cl_kernel kernel, cl_mem memObjects[3]) { - for (int i = 0; i < 3; i++) { +void Cleanup(cl_device_id device_id, cl_context context, cl_command_queue commandQueue, + cl_program program, cl_kernel kernel, cl_mem memObjects[2]) { + if (kernel_bin) + free(kernel_bin); + + if (commandQueue != 0) + clReleaseCommandQueue(commandQueue); + + for (int i = 0; i < 2; i++) { if (memObjects[i] != 0) clReleaseMemObject(memObjects[i]); } - if (commandQueue != 0) - clReleaseCommandQueue(commandQueue); if (kernel != 0) clReleaseKernel(kernel); @@ -103,11 +105,40 @@ void Cleanup(cl_context context, cl_command_queue commandQueue, if (context != 0) clReleaseContext(context); - if (kernel_bin) free(kernel_bin); + if (device_id != 0) + clReleaseDevice(device_id); +} + +int size = 1024; + +static void show_usage() { + printf("Usage: [-n size] [-h: help]\n"); +} + +static void parse_args(int argc, char **argv) { + int c; + while ((c = getopt(argc, argv, "n:h?")) != -1) { + switch (c) { + case 'n': + size = atoi(optarg); + break; + case 'h': + case '?': { + show_usage(); + exit(0); + } break; + default: + show_usage(); + exit(-1); + } + } + + printf("Workload size=%d\n", size); } int main(int argc, char **argv) { - printf("enter demo main\n"); + // parse command arguments + parse_args(argc, argv); cl_platform_id platform_id; cl_device_id device_id; @@ -139,7 +170,7 @@ int main(int argc, char **argv) { context, 1, &device_id, &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &_err)); if (program == NULL) { std::cerr << "Failed to write program binary" << std::endl; - Cleanup(context, queue, program, kernel, memObjects); + Cleanup(device_id, context, queue, program, kernel, memObjects); return 1; } else { std::cout << "Read program from binary." << std::endl; @@ -148,7 +179,7 @@ int main(int argc, char **argv) { // Build program CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL)); - size_t nbytes = sizeof(float) * NUM_DATA; + size_t nbytes = sizeof(float) * size; printf("attempting to create input buffer\n"); cl_mem input_buffer; @@ -175,13 +206,13 @@ int main(int argc, char **argv) { printf("attempting to enqueue write buffer\n"); float* h_src = (float*)malloc(nbytes); - for (int i = 0; i < NUM_DATA; i++) { + for (int i = 0; i < size; i++) { h_src[i] = ((float)rand() / (float)(RAND_MAX)) * 100.0; } CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE, 0, nbytes, h_src, 0, NULL, NULL)); free(h_src); - size_t global_work_size[] = {NUM_DATA/2, NUM_DATA/2}; + size_t global_work_size[] = {size/2, size/2}; printf("attempting to enqueue kernel\n"); auto time_start = std::chrono::high_resolution_clock::now(); CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, @@ -196,18 +227,13 @@ int main(int argc, char **argv) { CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, nbytes, h_dst, 0, NULL, NULL)); /*printf("Result:"); - for (int i = 0; i < NUM_DATA; i++) { + for (int i = 0; i < size; i++) { float data = h_dst[i]; printf(" %f", data); }*/ free(h_dst); - CL_CHECK(clReleaseMemObject(memObjects[0])); - CL_CHECK(clReleaseMemObject(memObjects[1])); - - CL_CHECK(clReleaseKernel(kernel)); - CL_CHECK(clReleaseProgram(program)); - CL_CHECK(clReleaseContext(context)); + Cleanup(device_id, context, queue, program, kernel, memObjects); return 0; } diff --git a/benchmarks/opencl/sfilter/Makefile b/benchmarks/opencl/sfilter/Makefile index 62099e37..6a22e827 100644 --- a/benchmarks/opencl/sfilter/Makefile +++ b/benchmarks/opencl/sfilter/Makefile @@ -7,6 +7,8 @@ POCL_RT_PATH ?= /opt/pocl/runtime VORTEX_DRV_PATH ?= $(realpath ../../../driver) VORTEX_RT_PATH ?= $(realpath ../../../runtime) +OPTS ?= -n16 + K_LLCFLAGS += "-O3 -march=riscv32 -target-abi=ilp32f -mcpu=generic-rv32 -mattr=+m,+f -float-abi=hard -code-model=small" K_CFLAGS += "-v -O3 --sysroot=$(SYSROOT) --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) -march=rv32imf -mabi=ilp32f -I$(VORTEX_RT_PATH)/include -fno-rtti -fno-exceptions -ffreestanding -nostartfiles -fdata-sections -ffunction-sections" K_LDFLAGS += "-Wl,-Bstatic,-T$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a -lm" @@ -33,19 +35,19 @@ $(PROJECT): $(SRCS) $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ run-fpga: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) run-asesim: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) run-vlsim: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) run-simx: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) run-rtlsim: $(PROJECT) kernel.pocl - LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) .depend: $(SRCS) $(CXX) $(CXXFLAGS) -MM $^ > .depend; diff --git a/benchmarks/opencl/sfilter/main.cc b/benchmarks/opencl/sfilter/main.cc index 2f6992d1..17f4cea0 100644 --- a/benchmarks/opencl/sfilter/main.cc +++ b/benchmarks/opencl/sfilter/main.cc @@ -35,8 +35,6 @@ #include #include -#define NUM_DATA (16+2) - #define CL_CHECK(_expr) \ do { \ cl_int _err = _expr; \ @@ -159,14 +157,18 @@ float poclu_cl_half_to_float(cl_half value) { /// // Cleanup any created OpenCL resources // -void Cleanup(cl_context context, cl_command_queue commandQueue, - cl_program program, cl_kernel kernel, cl_mem memObjects[3]) { - for (int i = 0; i < 3; i++) { +void Cleanup(cl_device_id device_id, cl_context context, cl_command_queue commandQueue, + cl_program program, cl_kernel kernel, cl_mem memObjects[2]) { + if (kernel_bin) + free(kernel_bin); + + if (commandQueue != 0) + clReleaseCommandQueue(commandQueue); + + for (int i = 0; i < 2; i++) { if (memObjects[i] != 0) clReleaseMemObject(memObjects[i]); } - if (commandQueue != 0) - clReleaseCommandQueue(commandQueue); if (kernel != 0) clReleaseKernel(kernel); @@ -177,11 +179,40 @@ void Cleanup(cl_context context, cl_command_queue commandQueue, if (context != 0) clReleaseContext(context); - if (kernel_bin) free(kernel_bin); + if (device_id != 0) + clReleaseDevice(device_id); +} + +int size = 16+2; + +static void show_usage() { + printf("Usage: [-n size] [-h: help]\n"); +} + +static void parse_args(int argc, char **argv) { + int c; + while ((c = getopt(argc, argv, "n:h?")) != -1) { + switch (c) { + case 'n': + size = atoi(optarg)+2; + break; + case 'h': + case '?': { + show_usage(); + exit(0); + } break; + default: + show_usage(); + exit(-1); + } + } + + printf("Workload size=%d\n", size); } int main(int argc, char **argv) { - printf("enter demo main\n"); + // parse command arguments + parse_args(argc, argv); cl_platform_id platform_id; cl_device_id device_id; @@ -213,7 +244,7 @@ int main(int argc, char **argv) { context, 1, &device_id, &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &_err)); if (program == NULL) { std::cerr << "Failed to write program binary" << std::endl; - Cleanup(context, queue, program, kernel, memObjects); + Cleanup(device_id, context, queue, program, kernel, memObjects); return 1; } else { std::cout << "Read program from binary." << std::endl; @@ -222,7 +253,7 @@ int main(int argc, char **argv) { // Build program CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL)); - size_t nbytes = sizeof(float) * NUM_DATA * NUM_DATA; + size_t nbytes = sizeof(float) * size * size; printf("attempting to create input buffer\n"); cl_mem input_buffer; @@ -235,7 +266,7 @@ int main(int argc, char **argv) { memObjects[0] = input_buffer; memObjects[1] = output_buffer; - long long ldc = NUM_DATA; + long long ldc = size; float m0 = 1.0; float m1 = 1.0; @@ -265,15 +296,15 @@ int main(int argc, char **argv) { printf("attempting to enqueue write buffer\n"); float* h_src = (float*)malloc(nbytes); - for (int i = 0; i < NUM_DATA * NUM_DATA; i++) { + for (int i = 0; i < size * size; i++) { h_src[i] = ((float)rand() / (float)(RAND_MAX)) * 100.0; } CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE, 0, nbytes, h_src, 0, NULL, NULL)); free(h_src); size_t global_offset[2] = {1, 1}; - size_t global_work_size[2] = {NUM_DATA - 2, NUM_DATA - 2}; // avoid the edges - const size_t local_work_size[2] = {NUM_DATA - 2, 1}; + size_t global_work_size[2] = {size - 2, size - 2}; // avoid the edges + const size_t local_work_size[2] = {size - 2, 1}; printf("attempting to enqueue kernel\n"); auto time_start = std::chrono::high_resolution_clock::now(); CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 2, global_offset, @@ -286,20 +317,15 @@ int main(int argc, char **argv) { printf("Download destination buffer\n"); float* h_dst = (float*)malloc(nbytes); CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, nbytes, h_dst, 0, NULL, NULL)); - + /*printf("Result:"); - for (int i = 0; i < NUM_DATA * NUM_DATA; i++) { + for (int i = 0; i < size; i++) { float data = h_dst[i]; printf(" %f", data); }*/ free(h_dst); - CL_CHECK(clReleaseMemObject(memObjects[0])); - CL_CHECK(clReleaseMemObject(memObjects[1])); - - CL_CHECK(clReleaseKernel(kernel)); - CL_CHECK(clReleaseProgram(program)); - CL_CHECK(clReleaseContext(context)); + Cleanup(device_id, context, queue, program, kernel, memObjects); return 0; } diff --git a/benchmarks/opencl/sgemm/main.cc b/benchmarks/opencl/sgemm/main.cc index 1f92a14a..f61a8727 100644 --- a/benchmarks/opencl/sgemm/main.cc +++ b/benchmarks/opencl/sgemm/main.cc @@ -129,6 +129,8 @@ static void parse_args(int argc, char **argv) { printf("Error: invalid size!\n"); exit(-1); } + + printf("Workload size=%d\n", size); } int main (int argc, char **argv) { diff --git a/benchmarks/opencl/vecadd/main.cc b/benchmarks/opencl/vecadd/main.cc index 14f35877..5b8c3a83 100644 --- a/benchmarks/opencl/vecadd/main.cc +++ b/benchmarks/opencl/vecadd/main.cc @@ -112,6 +112,8 @@ static void parse_args(int argc, char **argv) { exit(-1); } } + + printf("Workload size=%d\n", size); } int main (int argc, char **argv) {