updates to opencl benchmarks
This commit is contained in:
@@ -7,6 +7,8 @@ POCL_RT_PATH ?= /opt/pocl/runtime
|
|||||||
VORTEX_DRV_PATH ?= $(realpath ../../../driver)
|
VORTEX_DRV_PATH ?= $(realpath ../../../driver)
|
||||||
VORTEX_RT_PATH ?= $(realpath ../../../runtime)
|
VORTEX_RT_PATH ?= $(realpath ../../../runtime)
|
||||||
|
|
||||||
|
OPTS ?= -n1024
|
||||||
|
|
||||||
K_LLCFLAGS += "-O3 -march=riscv32 -target-abi=ilp32f -mcpu=generic-rv32 -mattr=+m,+f -float-abi=hard -code-model=small"
|
K_LLCFLAGS += "-O3 -march=riscv32 -target-abi=ilp32f -mcpu=generic-rv32 -mattr=+m,+f -float-abi=hard -code-model=small"
|
||||||
K_CFLAGS += "-v -O3 --sysroot=$(SYSROOT) --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) -march=rv32imf -mabi=ilp32f -I$(VORTEX_RT_PATH)/include -fno-rtti -fno-exceptions -ffreestanding -nostartfiles -fdata-sections -ffunction-sections"
|
K_CFLAGS += "-v -O3 --sysroot=$(SYSROOT) --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) -march=rv32imf -mabi=ilp32f -I$(VORTEX_RT_PATH)/include -fno-rtti -fno-exceptions -ffreestanding -nostartfiles -fdata-sections -ffunction-sections"
|
||||||
K_LDFLAGS += "-Wl,-Bstatic,-T$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a -lm"
|
K_LDFLAGS += "-Wl,-Bstatic,-T$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a -lm"
|
||||||
@@ -33,19 +35,19 @@ $(PROJECT): $(SRCS)
|
|||||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||||
|
|
||||||
run-fpga: $(PROJECT) kernel.pocl
|
run-fpga: $(PROJECT) kernel.pocl
|
||||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||||
|
|
||||||
run-asesim: $(PROJECT) kernel.pocl
|
run-asesim: $(PROJECT) kernel.pocl
|
||||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||||
|
|
||||||
run-vlsim: $(PROJECT) kernel.pocl
|
run-vlsim: $(PROJECT) kernel.pocl
|
||||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||||
|
|
||||||
run-simx: $(PROJECT) kernel.pocl
|
run-simx: $(PROJECT) kernel.pocl
|
||||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||||
|
|
||||||
run-rtlsim: $(PROJECT) kernel.pocl
|
run-rtlsim: $(PROJECT) kernel.pocl
|
||||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||||
|
|
||||||
.depend: $(SRCS)
|
.depend: $(SRCS)
|
||||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||||
|
|||||||
@@ -29,11 +29,9 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
|
||||||
//#define NUM_DATA 65536
|
|
||||||
#define NUM_DATA 1024
|
|
||||||
|
|
||||||
#define CL_CHECK(_expr) \
|
#define CL_CHECK(_expr) \
|
||||||
do { \
|
do { \
|
||||||
cl_int _err = _expr; \
|
cl_int _err = _expr; \
|
||||||
@@ -85,14 +83,18 @@ uint8_t *kernel_bin = NULL;
|
|||||||
///
|
///
|
||||||
// Cleanup any created OpenCL resources
|
// Cleanup any created OpenCL resources
|
||||||
//
|
//
|
||||||
void Cleanup(cl_context context, cl_command_queue commandQueue,
|
void Cleanup(cl_device_id device_id, cl_context context, cl_command_queue commandQueue,
|
||||||
cl_program program, cl_kernel kernel, cl_mem memObjects[3]) {
|
cl_program program, cl_kernel kernel, cl_mem memObjects[2]) {
|
||||||
for (int i = 0; i < 3; i++) {
|
if (kernel_bin)
|
||||||
|
free(kernel_bin);
|
||||||
|
|
||||||
|
if (commandQueue != 0)
|
||||||
|
clReleaseCommandQueue(commandQueue);
|
||||||
|
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
if (memObjects[i] != 0)
|
if (memObjects[i] != 0)
|
||||||
clReleaseMemObject(memObjects[i]);
|
clReleaseMemObject(memObjects[i]);
|
||||||
}
|
}
|
||||||
if (commandQueue != 0)
|
|
||||||
clReleaseCommandQueue(commandQueue);
|
|
||||||
|
|
||||||
if (kernel != 0)
|
if (kernel != 0)
|
||||||
clReleaseKernel(kernel);
|
clReleaseKernel(kernel);
|
||||||
@@ -103,11 +105,40 @@ void Cleanup(cl_context context, cl_command_queue commandQueue,
|
|||||||
if (context != 0)
|
if (context != 0)
|
||||||
clReleaseContext(context);
|
clReleaseContext(context);
|
||||||
|
|
||||||
if (kernel_bin) free(kernel_bin);
|
if (device_id != 0)
|
||||||
|
clReleaseDevice(device_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
int size = 1024;
|
||||||
|
|
||||||
|
static void show_usage() {
|
||||||
|
printf("Usage: [-n size] [-h: help]\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void parse_args(int argc, char **argv) {
|
||||||
|
int c;
|
||||||
|
while ((c = getopt(argc, argv, "n:h?")) != -1) {
|
||||||
|
switch (c) {
|
||||||
|
case 'n':
|
||||||
|
size = atoi(optarg);
|
||||||
|
break;
|
||||||
|
case 'h':
|
||||||
|
case '?': {
|
||||||
|
show_usage();
|
||||||
|
exit(0);
|
||||||
|
} break;
|
||||||
|
default:
|
||||||
|
show_usage();
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Workload size=%d\n", size);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
printf("enter demo main\n");
|
// parse command arguments
|
||||||
|
parse_args(argc, argv);
|
||||||
|
|
||||||
cl_platform_id platform_id;
|
cl_platform_id platform_id;
|
||||||
cl_device_id device_id;
|
cl_device_id device_id;
|
||||||
@@ -139,7 +170,7 @@ int main(int argc, char **argv) {
|
|||||||
context, 1, &device_id, &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &_err));
|
context, 1, &device_id, &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &_err));
|
||||||
if (program == NULL) {
|
if (program == NULL) {
|
||||||
std::cerr << "Failed to write program binary" << std::endl;
|
std::cerr << "Failed to write program binary" << std::endl;
|
||||||
Cleanup(context, queue, program, kernel, memObjects);
|
Cleanup(device_id, context, queue, program, kernel, memObjects);
|
||||||
return 1;
|
return 1;
|
||||||
} else {
|
} else {
|
||||||
std::cout << "Read program from binary." << std::endl;
|
std::cout << "Read program from binary." << std::endl;
|
||||||
@@ -148,7 +179,7 @@ int main(int argc, char **argv) {
|
|||||||
// Build program
|
// Build program
|
||||||
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
|
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
|
||||||
|
|
||||||
size_t nbytes = sizeof(float) * NUM_DATA;
|
size_t nbytes = sizeof(float) * size;
|
||||||
|
|
||||||
printf("attempting to create input buffer\n");
|
printf("attempting to create input buffer\n");
|
||||||
cl_mem input_buffer;
|
cl_mem input_buffer;
|
||||||
@@ -175,13 +206,13 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
printf("attempting to enqueue write buffer\n");
|
printf("attempting to enqueue write buffer\n");
|
||||||
float* h_src = (float*)malloc(nbytes);
|
float* h_src = (float*)malloc(nbytes);
|
||||||
for (int i = 0; i < NUM_DATA; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
h_src[i] = ((float)rand() / (float)(RAND_MAX)) * 100.0;
|
h_src[i] = ((float)rand() / (float)(RAND_MAX)) * 100.0;
|
||||||
}
|
}
|
||||||
CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE, 0, nbytes, h_src, 0, NULL, NULL));
|
CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE, 0, nbytes, h_src, 0, NULL, NULL));
|
||||||
free(h_src);
|
free(h_src);
|
||||||
|
|
||||||
size_t global_work_size[] = {NUM_DATA/2, NUM_DATA/2};
|
size_t global_work_size[] = {size/2, size/2};
|
||||||
printf("attempting to enqueue kernel\n");
|
printf("attempting to enqueue kernel\n");
|
||||||
auto time_start = std::chrono::high_resolution_clock::now();
|
auto time_start = std::chrono::high_resolution_clock::now();
|
||||||
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
|
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
|
||||||
@@ -196,18 +227,13 @@ int main(int argc, char **argv) {
|
|||||||
CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, nbytes, h_dst, 0, NULL, NULL));
|
CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, nbytes, h_dst, 0, NULL, NULL));
|
||||||
|
|
||||||
/*printf("Result:");
|
/*printf("Result:");
|
||||||
for (int i = 0; i < NUM_DATA; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
float data = h_dst[i];
|
float data = h_dst[i];
|
||||||
printf(" %f", data);
|
printf(" %f", data);
|
||||||
}*/
|
}*/
|
||||||
free(h_dst);
|
free(h_dst);
|
||||||
|
|
||||||
CL_CHECK(clReleaseMemObject(memObjects[0]));
|
Cleanup(device_id, context, queue, program, kernel, memObjects);
|
||||||
CL_CHECK(clReleaseMemObject(memObjects[1]));
|
|
||||||
|
|
||||||
CL_CHECK(clReleaseKernel(kernel));
|
|
||||||
CL_CHECK(clReleaseProgram(program));
|
|
||||||
CL_CHECK(clReleaseContext(context));
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,6 +7,8 @@ POCL_RT_PATH ?= /opt/pocl/runtime
|
|||||||
VORTEX_DRV_PATH ?= $(realpath ../../../driver)
|
VORTEX_DRV_PATH ?= $(realpath ../../../driver)
|
||||||
VORTEX_RT_PATH ?= $(realpath ../../../runtime)
|
VORTEX_RT_PATH ?= $(realpath ../../../runtime)
|
||||||
|
|
||||||
|
OPTS ?= -n16
|
||||||
|
|
||||||
K_LLCFLAGS += "-O3 -march=riscv32 -target-abi=ilp32f -mcpu=generic-rv32 -mattr=+m,+f -float-abi=hard -code-model=small"
|
K_LLCFLAGS += "-O3 -march=riscv32 -target-abi=ilp32f -mcpu=generic-rv32 -mattr=+m,+f -float-abi=hard -code-model=small"
|
||||||
K_CFLAGS += "-v -O3 --sysroot=$(SYSROOT) --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) -march=rv32imf -mabi=ilp32f -I$(VORTEX_RT_PATH)/include -fno-rtti -fno-exceptions -ffreestanding -nostartfiles -fdata-sections -ffunction-sections"
|
K_CFLAGS += "-v -O3 --sysroot=$(SYSROOT) --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) -march=rv32imf -mabi=ilp32f -I$(VORTEX_RT_PATH)/include -fno-rtti -fno-exceptions -ffreestanding -nostartfiles -fdata-sections -ffunction-sections"
|
||||||
K_LDFLAGS += "-Wl,-Bstatic,-T$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a -lm"
|
K_LDFLAGS += "-Wl,-Bstatic,-T$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a -lm"
|
||||||
@@ -33,19 +35,19 @@ $(PROJECT): $(SRCS)
|
|||||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||||
|
|
||||||
run-fpga: $(PROJECT) kernel.pocl
|
run-fpga: $(PROJECT) kernel.pocl
|
||||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||||
|
|
||||||
run-asesim: $(PROJECT) kernel.pocl
|
run-asesim: $(PROJECT) kernel.pocl
|
||||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||||
|
|
||||||
run-vlsim: $(PROJECT) kernel.pocl
|
run-vlsim: $(PROJECT) kernel.pocl
|
||||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||||
|
|
||||||
run-simx: $(PROJECT) kernel.pocl
|
run-simx: $(PROJECT) kernel.pocl
|
||||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||||
|
|
||||||
run-rtlsim: $(PROJECT) kernel.pocl
|
run-rtlsim: $(PROJECT) kernel.pocl
|
||||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||||
|
|
||||||
.depend: $(SRCS)
|
.depend: $(SRCS)
|
||||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||||
|
|||||||
@@ -35,8 +35,6 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
|
||||||
#define NUM_DATA (16+2)
|
|
||||||
|
|
||||||
#define CL_CHECK(_expr) \
|
#define CL_CHECK(_expr) \
|
||||||
do { \
|
do { \
|
||||||
cl_int _err = _expr; \
|
cl_int _err = _expr; \
|
||||||
@@ -159,14 +157,18 @@ float poclu_cl_half_to_float(cl_half value) {
|
|||||||
///
|
///
|
||||||
// Cleanup any created OpenCL resources
|
// Cleanup any created OpenCL resources
|
||||||
//
|
//
|
||||||
void Cleanup(cl_context context, cl_command_queue commandQueue,
|
void Cleanup(cl_device_id device_id, cl_context context, cl_command_queue commandQueue,
|
||||||
cl_program program, cl_kernel kernel, cl_mem memObjects[3]) {
|
cl_program program, cl_kernel kernel, cl_mem memObjects[2]) {
|
||||||
for (int i = 0; i < 3; i++) {
|
if (kernel_bin)
|
||||||
|
free(kernel_bin);
|
||||||
|
|
||||||
|
if (commandQueue != 0)
|
||||||
|
clReleaseCommandQueue(commandQueue);
|
||||||
|
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
if (memObjects[i] != 0)
|
if (memObjects[i] != 0)
|
||||||
clReleaseMemObject(memObjects[i]);
|
clReleaseMemObject(memObjects[i]);
|
||||||
}
|
}
|
||||||
if (commandQueue != 0)
|
|
||||||
clReleaseCommandQueue(commandQueue);
|
|
||||||
|
|
||||||
if (kernel != 0)
|
if (kernel != 0)
|
||||||
clReleaseKernel(kernel);
|
clReleaseKernel(kernel);
|
||||||
@@ -177,11 +179,40 @@ void Cleanup(cl_context context, cl_command_queue commandQueue,
|
|||||||
if (context != 0)
|
if (context != 0)
|
||||||
clReleaseContext(context);
|
clReleaseContext(context);
|
||||||
|
|
||||||
if (kernel_bin) free(kernel_bin);
|
if (device_id != 0)
|
||||||
|
clReleaseDevice(device_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
int size = 16+2;
|
||||||
|
|
||||||
|
static void show_usage() {
|
||||||
|
printf("Usage: [-n size] [-h: help]\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void parse_args(int argc, char **argv) {
|
||||||
|
int c;
|
||||||
|
while ((c = getopt(argc, argv, "n:h?")) != -1) {
|
||||||
|
switch (c) {
|
||||||
|
case 'n':
|
||||||
|
size = atoi(optarg)+2;
|
||||||
|
break;
|
||||||
|
case 'h':
|
||||||
|
case '?': {
|
||||||
|
show_usage();
|
||||||
|
exit(0);
|
||||||
|
} break;
|
||||||
|
default:
|
||||||
|
show_usage();
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Workload size=%d\n", size);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
printf("enter demo main\n");
|
// parse command arguments
|
||||||
|
parse_args(argc, argv);
|
||||||
|
|
||||||
cl_platform_id platform_id;
|
cl_platform_id platform_id;
|
||||||
cl_device_id device_id;
|
cl_device_id device_id;
|
||||||
@@ -213,7 +244,7 @@ int main(int argc, char **argv) {
|
|||||||
context, 1, &device_id, &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &_err));
|
context, 1, &device_id, &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &_err));
|
||||||
if (program == NULL) {
|
if (program == NULL) {
|
||||||
std::cerr << "Failed to write program binary" << std::endl;
|
std::cerr << "Failed to write program binary" << std::endl;
|
||||||
Cleanup(context, queue, program, kernel, memObjects);
|
Cleanup(device_id, context, queue, program, kernel, memObjects);
|
||||||
return 1;
|
return 1;
|
||||||
} else {
|
} else {
|
||||||
std::cout << "Read program from binary." << std::endl;
|
std::cout << "Read program from binary." << std::endl;
|
||||||
@@ -222,7 +253,7 @@ int main(int argc, char **argv) {
|
|||||||
// Build program
|
// Build program
|
||||||
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
|
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
|
||||||
|
|
||||||
size_t nbytes = sizeof(float) * NUM_DATA * NUM_DATA;
|
size_t nbytes = sizeof(float) * size * size;
|
||||||
|
|
||||||
printf("attempting to create input buffer\n");
|
printf("attempting to create input buffer\n");
|
||||||
cl_mem input_buffer;
|
cl_mem input_buffer;
|
||||||
@@ -235,7 +266,7 @@ int main(int argc, char **argv) {
|
|||||||
memObjects[0] = input_buffer;
|
memObjects[0] = input_buffer;
|
||||||
memObjects[1] = output_buffer;
|
memObjects[1] = output_buffer;
|
||||||
|
|
||||||
long long ldc = NUM_DATA;
|
long long ldc = size;
|
||||||
|
|
||||||
float m0 = 1.0;
|
float m0 = 1.0;
|
||||||
float m1 = 1.0;
|
float m1 = 1.0;
|
||||||
@@ -265,15 +296,15 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
printf("attempting to enqueue write buffer\n");
|
printf("attempting to enqueue write buffer\n");
|
||||||
float* h_src = (float*)malloc(nbytes);
|
float* h_src = (float*)malloc(nbytes);
|
||||||
for (int i = 0; i < NUM_DATA * NUM_DATA; i++) {
|
for (int i = 0; i < size * size; i++) {
|
||||||
h_src[i] = ((float)rand() / (float)(RAND_MAX)) * 100.0;
|
h_src[i] = ((float)rand() / (float)(RAND_MAX)) * 100.0;
|
||||||
}
|
}
|
||||||
CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE, 0, nbytes, h_src, 0, NULL, NULL));
|
CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE, 0, nbytes, h_src, 0, NULL, NULL));
|
||||||
free(h_src);
|
free(h_src);
|
||||||
|
|
||||||
size_t global_offset[2] = {1, 1};
|
size_t global_offset[2] = {1, 1};
|
||||||
size_t global_work_size[2] = {NUM_DATA - 2, NUM_DATA - 2}; // avoid the edges
|
size_t global_work_size[2] = {size - 2, size - 2}; // avoid the edges
|
||||||
const size_t local_work_size[2] = {NUM_DATA - 2, 1};
|
const size_t local_work_size[2] = {size - 2, 1};
|
||||||
printf("attempting to enqueue kernel\n");
|
printf("attempting to enqueue kernel\n");
|
||||||
auto time_start = std::chrono::high_resolution_clock::now();
|
auto time_start = std::chrono::high_resolution_clock::now();
|
||||||
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 2, global_offset,
|
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 2, global_offset,
|
||||||
@@ -288,18 +319,13 @@ int main(int argc, char **argv) {
|
|||||||
CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, nbytes, h_dst, 0, NULL, NULL));
|
CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, nbytes, h_dst, 0, NULL, NULL));
|
||||||
|
|
||||||
/*printf("Result:");
|
/*printf("Result:");
|
||||||
for (int i = 0; i < NUM_DATA * NUM_DATA; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
float data = h_dst[i];
|
float data = h_dst[i];
|
||||||
printf(" %f", data);
|
printf(" %f", data);
|
||||||
}*/
|
}*/
|
||||||
free(h_dst);
|
free(h_dst);
|
||||||
|
|
||||||
CL_CHECK(clReleaseMemObject(memObjects[0]));
|
Cleanup(device_id, context, queue, program, kernel, memObjects);
|
||||||
CL_CHECK(clReleaseMemObject(memObjects[1]));
|
|
||||||
|
|
||||||
CL_CHECK(clReleaseKernel(kernel));
|
|
||||||
CL_CHECK(clReleaseProgram(program));
|
|
||||||
CL_CHECK(clReleaseContext(context));
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -129,6 +129,8 @@ static void parse_args(int argc, char **argv) {
|
|||||||
printf("Error: invalid size!\n");
|
printf("Error: invalid size!\n");
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
printf("Workload size=%d\n", size);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main (int argc, char **argv) {
|
int main (int argc, char **argv) {
|
||||||
|
|||||||
@@ -112,6 +112,8 @@ static void parse_args(int argc, char **argv) {
|
|||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
printf("Workload size=%d\n", size);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main (int argc, char **argv) {
|
int main (int argc, char **argv) {
|
||||||
|
|||||||
Reference in New Issue
Block a user