Vortex 2.0 changes:
+ Microarchitecture optimizations + 64-bit support + Xilinx FPGA support + LLVM-16 support + Refactoring and quality control fixes
This commit is contained in:
@@ -104,8 +104,8 @@ static int initialize(int use_gpu) {
|
||||
context = clCreateContext(NULL, 1, device_list, NULL, NULL, &result);
|
||||
|
||||
// create command queue for the first device
|
||||
cmd_queue = clCreateCommandQueue(context, device_list[0], 0, NULL);
|
||||
if (!cmd_queue) {
|
||||
cmd_queue = clCreateCommandQueue(context, device_list[0], 0, &result);
|
||||
if (!cmd_queue || result != CL_SUCCESS) {
|
||||
printf("ERROR: clCreateCommandQueue() failed\n");
|
||||
return -1;
|
||||
}
|
||||
@@ -120,7 +120,7 @@ static int shutdown() {
|
||||
if (context)
|
||||
clReleaseContext(context);
|
||||
if (device_list)
|
||||
delete device_list;
|
||||
delete [] device_list;
|
||||
|
||||
// reset all variables
|
||||
cmd_queue = 0;
|
||||
@@ -188,7 +188,7 @@ int allocate(int n_points, int n_features, int n_clusters, float **feature) {
|
||||
fread(source + strlen(source), sourcesize, 1, fp);
|
||||
fclose(fp);*/
|
||||
|
||||
// OpenCL initialization
|
||||
// OpenCL initialization
|
||||
int use_gpu = 1;
|
||||
if (initialize(use_gpu))
|
||||
return -1;
|
||||
@@ -197,12 +197,25 @@ int allocate(int n_points, int n_features, int n_clusters, float **feature) {
|
||||
cl_int err = 0;
|
||||
//const char *slist[2] = {source, 0};
|
||||
//cl_program prog = clCreateProgramWithSource(context, 1, slist, NULL, &err);
|
||||
cl_program prog = clCreateProgramWithBuiltInKernels(context, 1, device_list, "kmeans_kernel_c;kmeans_swap", &err);
|
||||
uint8_t *kernel_bin = NULL;
|
||||
size_t kernel_size;
|
||||
cl_int binary_status = 0;
|
||||
err = read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("ERROR: clCreateProgramWithSource() => %d\n", err);
|
||||
printf("ERROR: read_kernel_file() => %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
err = clBuildProgram(prog, 0, NULL, NULL, NULL, NULL);
|
||||
|
||||
cl_program prog = clCreateProgramWithBinary(
|
||||
context, 1, device_list, &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &err);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("ERROR: clCreateProgramWithBinary() => %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
free(kernel_bin);
|
||||
|
||||
err = clBuildProgram(prog, 1, &device_list[0], NULL, NULL, NULL);
|
||||
{ // show warnings/errors
|
||||
// static char log[65536]; memset(log, 0, sizeof(log));
|
||||
// cl_device_id device_id = 0;
|
||||
@@ -226,6 +239,7 @@ int allocate(int n_points, int n_features, int n_clusters, float **feature) {
|
||||
printf("ERROR: clCreateKernel() 0 => %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel2 = clCreateKernel(prog, kernel_swap, &err);
|
||||
if (err != CL_SUCCESS) {
|
||||
printf("ERROR: clCreateKernel() 0 => %d\n", err);
|
||||
@@ -241,6 +255,7 @@ int allocate(int n_points, int n_features, int n_clusters, float **feature) {
|
||||
n_points * n_features, err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
d_feature_swap =
|
||||
clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
n_points * n_features * sizeof(float), NULL, &err);
|
||||
@@ -249,6 +264,7 @@ int allocate(int n_points, int n_features, int n_clusters, float **feature) {
|
||||
n_points * n_features, err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
d_cluster =
|
||||
clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
n_clusters * n_features * sizeof(float), NULL, &err);
|
||||
@@ -257,6 +273,7 @@ int allocate(int n_points, int n_features, int n_clusters, float **feature) {
|
||||
n_clusters * n_features, err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
d_membership = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
n_points * sizeof(int), NULL, &err);
|
||||
if (err != CL_SUCCESS) {
|
||||
@@ -296,6 +313,8 @@ int allocate(int n_points, int n_features, int n_clusters, float **feature) {
|
||||
}
|
||||
|
||||
membership_OCL = (int *)malloc(n_points * sizeof(int));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void deallocateMemory() {
|
||||
|
||||
Reference in New Issue
Block a user