Merge branch 'master' into graphics

This commit is contained in:
Blaise Tine
2021-05-26 23:33:06 -07:00
120 changed files with 4269 additions and 2329 deletions

View File

@@ -233,7 +233,7 @@ free(allPlatforms);*/
//--cambine-4: Create an OpenCL command queue
oclHandles.queue = clCreateCommandQueue(
oclHandles.context, oclHandles.devices[DEVICE_ID_INUSED], 0, &resultCL);
printf("resultCL=%d, queue=0x%x\n", resultCL, oclHandles.queue);
//printf("resultCL=%d, queue=0x%x\n", resultCL, oclHandles.queue);
if ((resultCL != CL_SUCCESS) || (oclHandles.queue == NULL))
throw(string("InitCL()::Creating Command Queue. (clCreateCommandQueue)"));
@@ -383,8 +383,8 @@ void _clRelease() {
errorFlag = true;
}
oclHandles.kernel[nKernel] = NULL;
printf("clReleaseKernel()\n");
}
oclHandles.kernel.clear();
}
if (oclHandles.program != NULL) {
@@ -394,6 +394,7 @@ void _clRelease() {
errorFlag = true;
}
oclHandles.program = NULL;
printf("clReleaseProgram()\n");
}
if (oclHandles.queue != NULL) {
@@ -403,10 +404,9 @@ void _clRelease() {
errorFlag = true;
}
oclHandles.queue = NULL;
printf("clReleaseCommandQueue()\n");
}
free(oclHandles.devices);
if (oclHandles.context != NULL) {
cl_int resultCL = clReleaseContext(oclHandles.context);
if (resultCL != CL_SUCCESS) {
@@ -414,6 +414,17 @@ void _clRelease() {
errorFlag = true;
}
oclHandles.context = NULL;
printf("clReleaseContext()\n");
}
if (oclHandles.devices != NULL) {
cl_int resultCL = clReleaseDevice(oclHandles.devices[0]);
if (resultCL != CL_SUCCESS) {
cerr << "ReleaseCL()::Error: In clReleaseDevice" << endl;
errorFlag = true;
}
free(oclHandles.devices);
printf("clReleaseDevice()\n");
}
if (errorFlag)
@@ -675,7 +686,7 @@ void _clFinish() throw(string) {
void _clInvokeKernel(int kernel_id, int work_items,
int work_group_size) throw(string) {
cl_uint work_dim = WORK_DIM;
cl_event e[1];
//cl_event e[1];
if (work_items % work_group_size != 0) // process situations that work_items
// cannot be divided by work_group_size
work_items =
@@ -684,7 +695,7 @@ void _clInvokeKernel(int kernel_id, int work_items,
size_t global_work_size[] = {work_items, 1};
oclHandles.cl_status = clEnqueueNDRangeKernel(
oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0,
global_work_size, local_work_size, 0, 0, &(e[0]));
global_work_size, local_work_size, 0, 0, NULL);
#ifdef ERRMSG
oclHandles.error_str = "excpetion in _clInvokeKernel() -> ";
switch (oclHandles.cl_status) {
@@ -749,13 +760,13 @@ void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x,
cl_uint work_dim = WORK_DIM;
size_t local_work_size[] = {group_x, group_y};
size_t global_work_size[] = {range_x, range_y};
cl_event e[1];
//cl_event e[1];
/*if(work_items%work_group_size != 0) //process situations that work_items
cannot be divided by work_group_size
work_items = work_items + (work_group_size-(work_items%work_group_size));*/
oclHandles.cl_status = clEnqueueNDRangeKernel(
oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0,
global_work_size, local_work_size, 0, 0, &(e[0]));
global_work_size, local_work_size, 0, 0, NULL);
#ifdef ERRMSG
oclHandles.error_str = "excpetion in _clInvokeKernel() -> ";
switch (oclHandles.cl_status) {

View File

@@ -78,14 +78,15 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size,
char h_over;
cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask,
d_graph_visited, d_cost, d_over;
try {
//--1 transfer data from host to device
_clInit();
d_graph_nodes = _clMalloc(no_of_nodes * sizeof(Node), h_graph_nodes);
d_graph_edges = _clMalloc(edge_list_size * sizeof(int), h_graph_edges);
d_graph_mask = _clMallocRW(no_of_nodes * sizeof(char), h_graph_mask);
d_updating_graph_mask =
_clMallocRW(no_of_nodes * sizeof(char), h_updating_graph_mask);
d_updating_graph_mask = _clMallocRW(no_of_nodes * sizeof(char), h_updating_graph_mask);
d_graph_visited = _clMallocRW(no_of_nodes * sizeof(char), h_graph_visited);
d_cost = _clMallocRW(no_of_nodes * sizeof(int), h_cost);
@@ -94,8 +95,7 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size,
_clMemcpyH2D(d_graph_nodes, no_of_nodes * sizeof(Node), h_graph_nodes);
_clMemcpyH2D(d_graph_edges, edge_list_size * sizeof(int), h_graph_edges);
_clMemcpyH2D(d_graph_mask, no_of_nodes * sizeof(char), h_graph_mask);
_clMemcpyH2D(d_updating_graph_mask, no_of_nodes * sizeof(char),
h_updating_graph_mask);
_clMemcpyH2D(d_updating_graph_mask, no_of_nodes * sizeof(char), h_updating_graph_mask);
_clMemcpyH2D(d_graph_visited, no_of_nodes * sizeof(char), h_graph_visited);
_clMemcpyH2D(d_cost, no_of_nodes * sizeof(int), h_cost);
@@ -106,6 +106,7 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size,
kernel_timer.reset();
kernel_timer.start();
#endif
do {
h_over = false;
_clMemcpyH2D(d_over, sizeof(char), &h_over);
@@ -136,9 +137,8 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size,
_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
_clMemcpyD2H(d_over, sizeof(char), &h_over);
} while (h_over);
} while (h_over);
_clFinish();
#ifdef PROFILING
kernel_timer.stop();
kernel_time = kernel_timer.getTimeInSeconds();

View File

@@ -60,10 +60,10 @@ void compare_results(const datatype *cpu_results, const datatype *gpu_results, c
}
}
if (passed){
std::cout << "--cambine:passed:-)" << endl;
std::cout << "--cambine: passed: -)" << endl;
}
else{
std::cout << "--cambine: failed:-(" << endl;
std::cout << "--cambine: failed :-(" << endl;
}
return ;
}

View File

@@ -69,7 +69,7 @@ static cl_uint numPlatforms;
//! All discoverable OpenCL devices (one pointer per platform)
static cl_device_id* devices = NULL;
static cl_uint* numDevices;
static cl_uint* numDevices = NULL;
//! The chosen OpenCL platform
static cl_platform_id platform = NULL;
@@ -88,7 +88,6 @@ static cl_command_queue commandQueueNoProf = NULL;
//! Global status of events
static bool eventsEnabled = false;
//-------------------------------------------------------
// Initialization and Cleanup
//-------------------------------------------------------
@@ -239,14 +238,34 @@ static bool eventsEnabled = false;
return context;
}*/
static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
if (nullptr == filename || nullptr == data || 0 == size)
return -1;
FILE* fp = fopen(filename, "r");
if (NULL == fp) {
fprintf(stderr, "Failed to load kernel.");
return -1;
}
fseek(fp , 0 , SEEK_END);
long fsize = ftell(fp);
rewind(fp);
*data = (uint8_t*)malloc(fsize);
*size = fread(*data, 1, fsize, fp);
fclose(fp);
return 0;
}
cl_context cl_init_context(int platform, int dev,int quiet) {
int printInfo=1;
if (platform >= 0 && dev >= 0) printInfo = 0;
cl_int status;
// Used to iterate through the platforms and devices, respectively
cl_uint numPlatforms;
cl_uint numDevices;
// These will hold the platform and device we select (can potentially be
// multiple, but we're just doing one for now)
// cl_platform_id platform = NULL;
@@ -376,23 +395,24 @@ cl_context cl_init_context(int platform, int dev,int quiet) {
// Getting platform and device information
numPlatforms = 1;
numDevices = 1;
int platform_touse = 0;
int device_touse = 0;
platforms = (cl_platform_id*)malloc(numPlatforms * sizeof(cl_platform_id));
devices = (cl_device_id*)malloc(sizeof(cl_device_id)*numDevices);
status = clGetPlatformIDs(1, platforms, NULL);
numDevices = (cl_uint*)malloc(sizeof(cl_uint)*numPlatforms);
numDevices[0] = 1;
devices = (cl_device_id*)malloc(sizeof(cl_device_id)*numDevices[0]);
int platform_touse = 0;
int device_touse = 0;
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
cl_errChk(status, "Oops!", true);
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_DEFAULT, 1, devices, NULL);
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_DEFAULT, numDevices[0], devices, NULL);
cl_errChk(status, "Oops!", true);
context = clCreateContext(NULL, 1, devices, NULL, NULL, &status);
context = clCreateContext(NULL, numDevices[0], devices, NULL, NULL, &status);
cl_errChk(status, "Oops!", true);
device=devices[device_touse];
#define PROFILING
#ifdef PROFILING
commandQueue = clCreateCommandQueue(context,
@@ -400,7 +420,7 @@ cl_context cl_init_context(int platform, int dev,int quiet) {
#else
clCommandQueue = clCreateCommandQueue(clGPUContext,
commandQueue = clCreateCommandQueue(context,
devices[device_touse], NULL, &status);
#endif // PROFILING
@@ -413,22 +433,34 @@ cl_context cl_init_context(int platform, int dev,int quiet) {
/*!
Release all resources that the user doesn't have access to.
*/
void cl_cleanup()
void cl_cleanup()
{
cl_int status;
// Free the command queue
if(commandQueue) {
clReleaseCommandQueue(commandQueue);
if (commandQueue) {
status = clReleaseCommandQueue(commandQueue);
cl_errChk(status, "Oops!", true);
printf("clReleaseCommandQueue()\n");
}
// Free the context
if(context) {
clReleaseContext(context);
if (context) {
status = clReleaseContext(context);
cl_errChk(status, "Oops!", true);
printf("clReleaseContext()\n");
}
for (int p = 0; p < numPlatforms; ++p) {
for (int d = 0; d < numDevices[p]; ++d) {
status = clReleaseDevice(devices[d]);
cl_errChk(status, "Oops!", true);
printf("clReleaseDevice()\n");
}
}
free(devices);
free(numDevices);
// Free the platforms
free(platforms);
}
@@ -443,6 +475,7 @@ void cl_freeKernel(cl_kernel kernel)
if(kernel != NULL) {
status = clReleaseKernel(kernel);
cl_errChk(status, "Releasing kernel object", true);
printf("clReleaseKernel()\n");
}
}
@@ -457,6 +490,7 @@ void cl_freeMem(cl_mem mem)
if(mem != NULL) {
status = clReleaseMemObject(mem);
cl_errChk(status, "Releasing mem object", true);
printf("clReleaseMemObject()\n");
}
}
@@ -471,6 +505,7 @@ void cl_freeProgram(cl_program program)
if(program != NULL) {
status = clReleaseProgram(program);
cl_errChk(status, "Releasing program object", true);
printf("clReleaseProgram()\n");
}
}
@@ -782,27 +817,6 @@ void cl_writeToZCBuffer(cl_mem mem, void* data, size_t size)
cl_unmapBuffer(mem, ptr);
}
static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
if (nullptr == filename || nullptr == data || 0 == size)
return -1;
FILE* fp = fopen(filename, "r");
if (NULL == fp) {
fprintf(stderr, "Failed to load kernel.");
return -1;
}
fseek(fp , 0 , SEEK_END);
long fsize = ftell(fp);
rewind(fp);
*data = (uint8_t*)malloc(fsize);
*size = fread(*data, 1, fsize, fp);
fclose(fp);
return 0;
}
//-------------------------------------------------------
// Program and kernels
//-------------------------------------------------------
@@ -858,17 +872,17 @@ cl_program cl_compileProgram(char* kernelPath, char* compileoptions, bool verbos
fread(source, 1, size, fp);
source[size] = '\0';*/
// Create the program object
//cl_program clProgramReturn = clCreateProgramWithSource(context, 1, (const char **)&source, NULL, &status);
//cl_program clProgramReturn = clCreateProgramWithBuiltInKernels(context, 1, &device, "Fan1;Fan2", &status);
// read kernel binary from file
// read kernel binary from file
uint8_t *kernel_bin = NULL;
size_t kernel_size;
cl_int binary_status = 0;
status = read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size);
cl_errChk(status, "read_kernel_file", true);
cl_int binary_status = 0;
int err = read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size);
cl_errChk(err, "read_kernel_file", true);
// Create the program object
//cl_program clProgramReturn = clCreateProgramWithSource(context, 1, (const char **)&source, NULL, &status);
cl_program clProgramReturn = clCreateProgramWithBinary(
context, 1, &device, &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &status);
context, 1, devices, &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &status);
free(kernel_bin);
cl_errChk(status, "Creating program", true);
@@ -1440,4 +1454,4 @@ char* itoa_portable(int value, char* result, int base) {
}
return result;
}
}

View File

@@ -76,6 +76,9 @@ int main(int argc, char *argv[]) {
free(b);
free(finalVec);
// OpenClGaussianElimination(context,timing);
cl_cleanup();
printf("Passed!\n");
return 0;
}
@@ -142,7 +145,8 @@ void ForwardSub(cl_context context, float *a, float *b, float *m, int size,
writeTime += eventTime(writeEvent, command_queue);
clReleaseEvent(writeEvent);
error = clEnqueueWriteBuffer(command_queue, m_dev,
error = clEnqueueWriteBuffer(command_queue,
m_dev,
1, // change to 0 for nonblocking write
0, // offset
sizeof(float) * size * size, m, 0, NULL,
@@ -258,6 +262,13 @@ void ForwardSub(cl_context context, float *a, float *b, float *m, int size,
printf("%f\n\n", writeTime + kernelTime + readTime);
}
cl_freeMem(a_dev);
cl_freeMem(b_dev);
cl_freeMem(m_dev);
cl_freeKernel(fan1_kernel);
cl_freeKernel(fan2_kernel);
cl_freeProgram(gaussianElim_program);
}
float eventTime(cl_event event, cl_command_queue command_queue) {

View File

@@ -69,7 +69,7 @@ static cl_uint numPlatforms;
//! All discoverable OpenCL devices (one pointer per platform)
static cl_device_id* devices = NULL;
static cl_uint* numDevices;
static cl_uint* numDevices = NULL;
//! The chosen OpenCL platform
static cl_platform_id platform = NULL;
@@ -265,9 +265,7 @@ cl_context cl_init_context(int platform, int dev,int quiet) {
if (platform >= 0 && dev >= 0) printInfo = 0;
cl_int status;
// Used to iterate through the platforms and devices, respectively
cl_uint numPlatforms;
cl_uint numDevices;
// These will hold the platform and device we select (can potentially be
// multiple, but we're just doing one for now)
// cl_platform_id platform = NULL;
@@ -397,23 +395,24 @@ cl_context cl_init_context(int platform, int dev,int quiet) {
// Getting platform and device information
numPlatforms = 1;
numDevices = 1;
int platform_touse = 0;
int device_touse = 0;
platforms = (cl_platform_id*)malloc(numPlatforms * sizeof(cl_platform_id));
devices = (cl_device_id*)malloc(sizeof(cl_device_id)*numDevices);
status = clGetPlatformIDs(1, platforms, NULL);
numDevices = (cl_uint*)malloc(sizeof(cl_uint)*numPlatforms);
numDevices[0] = 1;
devices = (cl_device_id*)malloc(sizeof(cl_device_id)*numDevices[0]);
int platform_touse = 0;
int device_touse = 0;
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
cl_errChk(status, "Oops!", true);
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_DEFAULT, 1, devices, NULL);
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_DEFAULT, numDevices[0], devices, NULL);
cl_errChk(status, "Oops!", true);
context = clCreateContext(NULL, 1, devices, NULL, NULL, &status);
context = clCreateContext(NULL, numDevices[0], devices, NULL, NULL, &status);
cl_errChk(status, "Oops!", true);
device=devices[device_touse];
#define PROFILING
#ifdef PROFILING
commandQueue = clCreateCommandQueue(context,
@@ -421,7 +420,7 @@ cl_context cl_init_context(int platform, int dev,int quiet) {
#else
clCommandQueue = clCreateCommandQueue(clGPUContext,
commandQueue = clCreateCommandQueue(context,
devices[device_touse], NULL, &status);
#endif // PROFILING
@@ -434,22 +433,34 @@ cl_context cl_init_context(int platform, int dev,int quiet) {
/*!
Release all resources that the user doesn't have access to.
*/
void cl_cleanup()
void cl_cleanup()
{
cl_int status;
// Free the command queue
if(commandQueue) {
clReleaseCommandQueue(commandQueue);
if (commandQueue) {
status = clReleaseCommandQueue(commandQueue);
cl_errChk(status, "Oops!", true);
printf("clReleaseCommandQueue()\n");
}
// Free the context
if(context) {
clReleaseContext(context);
if (context) {
status = clReleaseContext(context);
cl_errChk(status, "Oops!", true);
printf("clReleaseContext()\n");
}
for (int p = 0; p < numPlatforms; ++p) {
for (int d = 0; d < numDevices[p]; ++d) {
status = clReleaseDevice(devices[d]);
cl_errChk(status, "Oops!", true);
printf("clReleaseDevice()\n");
}
}
free(devices);
free(numDevices);
// Free the platforms
free(platforms);
}
@@ -464,6 +475,7 @@ void cl_freeKernel(cl_kernel kernel)
if(kernel != NULL) {
status = clReleaseKernel(kernel);
cl_errChk(status, "Releasing kernel object", true);
printf("clReleaseKernel()\n");
}
}
@@ -478,6 +490,7 @@ void cl_freeMem(cl_mem mem)
if(mem != NULL) {
status = clReleaseMemObject(mem);
cl_errChk(status, "Releasing mem object", true);
printf("clReleaseMemObject()\n");
}
}
@@ -492,6 +505,7 @@ void cl_freeProgram(cl_program program)
if(program != NULL) {
status = clReleaseProgram(program);
cl_errChk(status, "Releasing program object", true);
printf("clReleaseProgram()\n");
}
}

View File

@@ -49,25 +49,27 @@ int main(int argc, char *argv[]) {
printf("%s --> Distance=%f\n", records[i].recString, records[i].distance);
}
free(recordDistances);
cl_cleanup();
printf("Passed!\n");
return 0;
}
float *OpenClFindNearestNeighbors(cl_context context, int numRecords,
std::vector<LatLong> &locations, float lat,
float lng, int timing) {
// 1. set up kernel
cl_kernel NN_kernel;
cl_int status;
// 1. set up kernel
cl_kernel NN_kernel;
cl_program cl_NN_program;
cl_NN_program = cl_compileProgram((char *)"nearestNeighbor_kernel.cl", NULL);
NN_kernel = clCreateKernel(cl_NN_program, "NearestNeighbor", &status);
status =
cl_errChk(status, (char *)"Error Creating Nearest Neighbor kernel", true);
if (status)
exit(1);
cl_errChk(status, (char *)"Error Creating Nearest Neighbor kernel", true);
// 2. set up memory on device and send ipts data to device
// copy ipts(1,2) to device
// also need to alloate memory for the distancePoints
@@ -78,9 +80,11 @@ float *OpenClFindNearestNeighbors(cl_context context, int numRecords,
d_locations = clCreateBuffer(context, CL_MEM_READ_ONLY,
sizeof(LatLong) * numRecords, NULL, &error);
cl_errChk(error, "ERROR: clCreateBuffer() failed", true);
d_distances = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(float) * numRecords, NULL, &error);
cl_errChk(error, "ERROR: clCreateBuffer() failed", true);
cl_command_queue command_queue = cl_getCommandQueue();
cl_event writeEvent, kernelEvent, readEvent;
@@ -89,6 +93,7 @@ float *OpenClFindNearestNeighbors(cl_context context, int numRecords,
0, // offset
sizeof(LatLong) * numRecords, &locations[0], 0,
NULL, &writeEvent);
cl_errChk(error, "ERROR: clEnqueueWriteBuffer() failed", true);
// 3. send arguments to device
cl_int argchk;
@@ -124,8 +129,10 @@ float *OpenClFindNearestNeighbors(cl_context context, int numRecords,
&readEvent);
cl_errChk(error, "ERROR with clEnqueueReadBuffer", true);
if (timing) {
clFinish(command_queue);
clFinish(command_queue);
if (timing) {
cl_ulong eventStart, eventEnd, totalTime = 0;
printf("# Records\tWrite(s) [size]\t\tKernel(s)\tRead(s) "
"[size]\t\tTotal(s)\n");
@@ -166,8 +173,14 @@ float *OpenClFindNearestNeighbors(cl_context context, int numRecords,
printf("%f\n\n", (float)(totalTime / 1e9));
}
// 6. return finalized data and release buffers
clReleaseMemObject(d_locations);
clReleaseMemObject(d_distances);
clReleaseEvent(writeEvent);
clReleaseEvent(kernelEvent);
clReleaseEvent(readEvent);
cl_freeMem(d_locations);
cl_freeMem(d_distances);
cl_freeKernel(NN_kernel);
cl_freeProgram(cl_NN_program);
return distances;
}

View File

@@ -7,6 +7,8 @@ POCL_RT_PATH ?= /opt/pocl/runtime
VORTEX_DRV_PATH ?= $(realpath ../../../driver)
VORTEX_RT_PATH ?= $(realpath ../../../runtime)
OPTS ?= -n1024
K_LLCFLAGS += "-O3 -march=riscv32 -target-abi=ilp32f -mcpu=generic-rv32 -mattr=+m,+f -float-abi=hard -code-model=small"
K_CFLAGS += "-v -O3 --sysroot=$(SYSROOT) --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) -march=rv32imf -mabi=ilp32f -I$(VORTEX_RT_PATH)/include -fno-rtti -fno-exceptions -ffreestanding -nostartfiles -fdata-sections -ffunction-sections"
K_LDFLAGS += "-Wl,-Bstatic,-T$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a -lm"
@@ -33,19 +35,19 @@ $(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
run-fpga: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-asesim: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-vlsim: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-simx: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-rtlsim: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend;

View File

@@ -29,11 +29,9 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <chrono>
//#define NUM_DATA 65536
#define NUM_DATA 1024
#define CL_CHECK(_expr) \
do { \
cl_int _err = _expr; \
@@ -85,14 +83,18 @@ uint8_t *kernel_bin = NULL;
///
// Cleanup any created OpenCL resources
//
void Cleanup(cl_context context, cl_command_queue commandQueue,
cl_program program, cl_kernel kernel, cl_mem memObjects[3]) {
for (int i = 0; i < 3; i++) {
void Cleanup(cl_device_id device_id, cl_context context, cl_command_queue commandQueue,
cl_program program, cl_kernel kernel, cl_mem memObjects[2]) {
if (kernel_bin)
free(kernel_bin);
if (commandQueue != 0)
clReleaseCommandQueue(commandQueue);
for (int i = 0; i < 2; i++) {
if (memObjects[i] != 0)
clReleaseMemObject(memObjects[i]);
}
if (commandQueue != 0)
clReleaseCommandQueue(commandQueue);
if (kernel != 0)
clReleaseKernel(kernel);
@@ -103,11 +105,40 @@ void Cleanup(cl_context context, cl_command_queue commandQueue,
if (context != 0)
clReleaseContext(context);
if (kernel_bin) free(kernel_bin);
if (device_id != 0)
clReleaseDevice(device_id);
}
int size = 1024;
static void show_usage() {
printf("Usage: [-n size] [-h: help]\n");
}
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "n:h?")) != -1) {
switch (c) {
case 'n':
size = atoi(optarg);
break;
case 'h':
case '?': {
show_usage();
exit(0);
} break;
default:
show_usage();
exit(-1);
}
}
printf("Workload size=%d\n", size);
}
int main(int argc, char **argv) {
printf("enter demo main\n");
// parse command arguments
parse_args(argc, argv);
cl_platform_id platform_id;
cl_device_id device_id;
@@ -126,7 +157,7 @@ int main(int argc, char **argv) {
context = CL_CHECK_ERR(clCreateContext(NULL, 1, &device_id, &pfn_notify, NULL, &_err));
cl_command_queue queue;
queue = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &_err));
queue = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, NULL, &_err));
cl_kernel kernel = 0;
cl_mem memObjects[2] = {0, 0};
@@ -139,7 +170,7 @@ int main(int argc, char **argv) {
context, 1, &device_id, &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &_err));
if (program == NULL) {
std::cerr << "Failed to write program binary" << std::endl;
Cleanup(context, queue, program, kernel, memObjects);
Cleanup(device_id, context, queue, program, kernel, memObjects);
return 1;
} else {
std::cout << "Read program from binary." << std::endl;
@@ -148,7 +179,7 @@ int main(int argc, char **argv) {
// Build program
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
size_t nbytes = sizeof(float) * NUM_DATA;
size_t nbytes = sizeof(float) * size;
printf("attempting to create input buffer\n");
cl_mem input_buffer;
@@ -175,13 +206,13 @@ int main(int argc, char **argv) {
printf("attempting to enqueue write buffer\n");
float* h_src = (float*)malloc(nbytes);
for (int i = 0; i < NUM_DATA; i++) {
for (int i = 0; i < size; i++) {
h_src[i] = ((float)rand() / (float)(RAND_MAX)) * 100.0;
}
CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE, 0, nbytes, h_src, 0, NULL, NULL));
free(h_src);
size_t global_work_size[] = {NUM_DATA/2, NUM_DATA/2};
size_t global_work_size[] = {size/2, size/2};
printf("attempting to enqueue kernel\n");
auto time_start = std::chrono::high_resolution_clock::now();
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
@@ -196,18 +227,13 @@ int main(int argc, char **argv) {
CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, nbytes, h_dst, 0, NULL, NULL));
/*printf("Result:");
for (int i = 0; i < NUM_DATA; i++) {
for (int i = 0; i < size; i++) {
float data = h_dst[i];
printf(" %f", data);
}*/
free(h_dst);
CL_CHECK(clReleaseMemObject(memObjects[0]));
CL_CHECK(clReleaseMemObject(memObjects[1]));
CL_CHECK(clReleaseKernel(kernel));
CL_CHECK(clReleaseProgram(program));
CL_CHECK(clReleaseContext(context));
Cleanup(device_id, context, queue, program, kernel, memObjects);
return 0;
}

View File

@@ -7,6 +7,8 @@ POCL_RT_PATH ?= /opt/pocl/runtime
VORTEX_DRV_PATH ?= $(realpath ../../../driver)
VORTEX_RT_PATH ?= $(realpath ../../../runtime)
OPTS ?= -n16
K_LLCFLAGS += "-O3 -march=riscv32 -target-abi=ilp32f -mcpu=generic-rv32 -mattr=+m,+f -float-abi=hard -code-model=small"
K_CFLAGS += "-v -O3 --sysroot=$(SYSROOT) --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) -march=rv32imf -mabi=ilp32f -I$(VORTEX_RT_PATH)/include -fno-rtti -fno-exceptions -ffreestanding -nostartfiles -fdata-sections -ffunction-sections"
K_LDFLAGS += "-Wl,-Bstatic,-T$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a -lm"
@@ -33,19 +35,19 @@ $(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
run-fpga: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-asesim: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-vlsim: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-simx: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-rtlsim: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend;

View File

@@ -35,8 +35,6 @@
#include <unistd.h>
#include <chrono>
#define NUM_DATA (16+2)
#define CL_CHECK(_expr) \
do { \
cl_int _err = _expr; \
@@ -159,14 +157,18 @@ float poclu_cl_half_to_float(cl_half value) {
///
// Cleanup any created OpenCL resources
//
void Cleanup(cl_context context, cl_command_queue commandQueue,
cl_program program, cl_kernel kernel, cl_mem memObjects[3]) {
for (int i = 0; i < 3; i++) {
void Cleanup(cl_device_id device_id, cl_context context, cl_command_queue commandQueue,
cl_program program, cl_kernel kernel, cl_mem memObjects[2]) {
if (kernel_bin)
free(kernel_bin);
if (commandQueue != 0)
clReleaseCommandQueue(commandQueue);
for (int i = 0; i < 2; i++) {
if (memObjects[i] != 0)
clReleaseMemObject(memObjects[i]);
}
if (commandQueue != 0)
clReleaseCommandQueue(commandQueue);
if (kernel != 0)
clReleaseKernel(kernel);
@@ -177,11 +179,40 @@ void Cleanup(cl_context context, cl_command_queue commandQueue,
if (context != 0)
clReleaseContext(context);
if (kernel_bin) free(kernel_bin);
if (device_id != 0)
clReleaseDevice(device_id);
}
int size = 16+2;
static void show_usage() {
printf("Usage: [-n size] [-h: help]\n");
}
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "n:h?")) != -1) {
switch (c) {
case 'n':
size = atoi(optarg)+2;
break;
case 'h':
case '?': {
show_usage();
exit(0);
} break;
default:
show_usage();
exit(-1);
}
}
printf("Workload size=%d\n", size);
}
int main(int argc, char **argv) {
printf("enter demo main\n");
// parse command arguments
parse_args(argc, argv);
cl_platform_id platform_id;
cl_device_id device_id;
@@ -213,7 +244,7 @@ int main(int argc, char **argv) {
context, 1, &device_id, &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &_err));
if (program == NULL) {
std::cerr << "Failed to write program binary" << std::endl;
Cleanup(context, queue, program, kernel, memObjects);
Cleanup(device_id, context, queue, program, kernel, memObjects);
return 1;
} else {
std::cout << "Read program from binary." << std::endl;
@@ -222,7 +253,7 @@ int main(int argc, char **argv) {
// Build program
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
size_t nbytes = sizeof(float) * NUM_DATA * NUM_DATA;
size_t nbytes = sizeof(float) * size * size;
printf("attempting to create input buffer\n");
cl_mem input_buffer;
@@ -235,7 +266,7 @@ int main(int argc, char **argv) {
memObjects[0] = input_buffer;
memObjects[1] = output_buffer;
long long ldc = NUM_DATA;
long long ldc = size;
float m0 = 1.0;
float m1 = 1.0;
@@ -265,15 +296,15 @@ int main(int argc, char **argv) {
printf("attempting to enqueue write buffer\n");
float* h_src = (float*)malloc(nbytes);
for (int i = 0; i < NUM_DATA * NUM_DATA; i++) {
for (int i = 0; i < size * size; i++) {
h_src[i] = ((float)rand() / (float)(RAND_MAX)) * 100.0;
}
CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE, 0, nbytes, h_src, 0, NULL, NULL));
free(h_src);
size_t global_offset[2] = {1, 1};
size_t global_work_size[2] = {NUM_DATA - 2, NUM_DATA - 2}; // avoid the edges
const size_t local_work_size[2] = {NUM_DATA - 2, 1};
size_t global_work_size[2] = {size - 2, size - 2}; // avoid the edges
const size_t local_work_size[2] = {size - 2, 1};
printf("attempting to enqueue kernel\n");
auto time_start = std::chrono::high_resolution_clock::now();
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 2, global_offset,
@@ -286,20 +317,15 @@ int main(int argc, char **argv) {
printf("Download destination buffer\n");
float* h_dst = (float*)malloc(nbytes);
CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, nbytes, h_dst, 0, NULL, NULL));
/*printf("Result:");
for (int i = 0; i < NUM_DATA * NUM_DATA; i++) {
for (int i = 0; i < size; i++) {
float data = h_dst[i];
printf(" %f", data);
}*/
free(h_dst);
CL_CHECK(clReleaseMemObject(memObjects[0]));
CL_CHECK(clReleaseMemObject(memObjects[1]));
CL_CHECK(clReleaseKernel(kernel));
CL_CHECK(clReleaseProgram(program));
CL_CHECK(clReleaseContext(context));
Cleanup(device_id, context, queue, program, kernel, memObjects);
return 0;
}

View File

@@ -129,6 +129,8 @@ static void parse_args(int argc, char **argv) {
printf("Error: invalid size!\n");
exit(-1);
}
printf("Workload size=%d\n", size);
}
int main (int argc, char **argv) {
@@ -218,7 +220,8 @@ int main (int argc, char **argv) {
matmul(h_ref, h_a, h_b, size, size, size);
for (int i = 0; i < (size * size); i++) {
if (!almost_equal(h_c[i], h_ref[i])) {
printf("*** error: [%d] expected=%f, actual=%f\n", i, h_ref[i], h_c[i]);
if (errors < 100)
printf("*** error: [%d] expected=%f, actual=%f\n", i, h_ref[i], h_c[i]);
++errors;
}
}

View File

@@ -112,6 +112,8 @@ static void parse_args(int argc, char **argv) {
exit(-1);
}
}
printf("Workload size=%d\n", size);
}
int main (int argc, char **argv) {
@@ -196,7 +198,8 @@ int main (int argc, char **argv) {
for (int i = 0; i < size; ++i) {
float ref = h_a[i] + h_b[i];
if (!almost_equal(h_c[i], ref)) {
printf("*** error: [%d] expected=%f, actual=%f, a=%f, b=%f\n", i, ref, h_c[i], h_a[i], h_b[i]);
if (errors < 100)
printf("*** error: [%d] expected=%f, actual=%f, a=%f, b=%f\n", i, ref, h_c[i], h_a[i], h_b[i]);
++errors;
}
}