Vortex 2.0 changes:

+ Microarchitecture optimizations
+ 64-bit support
+ Xilinx FPGA support
+ LLVM-16 support
+ Refactoring and quality control fixes

minor update

minor update

minor update

minor update

minor update

minor update

cleanup

cleanup

cache bindings and memory perf refactory

minor update

minor update

hw unit tests fixes

minor update

minor update

minor update

minor update

minor update

minor udpate

minor update

minor update

minor update

minor update

minor update

minor update

minor update

minor updates

minor updates

minor update

minor update

minor update

minor update

minor update

minor update

minor updates

minor updates

minor updates

minor updates

minor update

minor update
This commit is contained in:
Blaise Tine
2023-10-19 20:51:22 -07:00
parent d69a64c32c
commit c1e168fdbe
1309 changed files with 247412 additions and 311463 deletions

View File

@@ -104,8 +104,8 @@ static int initialize(int use_gpu) {
context = clCreateContext(NULL, 1, device_list, NULL, NULL, &result);
// create command queue for the first device
cmd_queue = clCreateCommandQueue(context, device_list[0], 0, NULL);
if (!cmd_queue) {
cmd_queue = clCreateCommandQueue(context, device_list[0], 0, &result);
if (!cmd_queue || result != CL_SUCCESS) {
printf("ERROR: clCreateCommandQueue() failed\n");
return -1;
}
@@ -120,7 +120,7 @@ static int shutdown() {
if (context)
clReleaseContext(context);
if (device_list)
delete device_list;
delete [] device_list;
// reset all variables
cmd_queue = 0;
@@ -188,7 +188,7 @@ int allocate(int n_points, int n_features, int n_clusters, float **feature) {
fread(source + strlen(source), sourcesize, 1, fp);
fclose(fp);*/
// OpenCL initialization
// OpenCL initialization
int use_gpu = 1;
if (initialize(use_gpu))
return -1;
@@ -197,12 +197,25 @@ int allocate(int n_points, int n_features, int n_clusters, float **feature) {
cl_int err = 0;
//const char *slist[2] = {source, 0};
//cl_program prog = clCreateProgramWithSource(context, 1, slist, NULL, &err);
cl_program prog = clCreateProgramWithBuiltInKernels(context, 1, device_list, "kmeans_kernel_c;kmeans_swap", &err);
uint8_t *kernel_bin = NULL;
size_t kernel_size;
cl_int binary_status = 0;
err = read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size);
if (err != CL_SUCCESS) {
printf("ERROR: clCreateProgramWithSource() => %d\n", err);
printf("ERROR: read_kernel_file() => %d\n", err);
return -1;
}
err = clBuildProgram(prog, 0, NULL, NULL, NULL, NULL);
cl_program prog = clCreateProgramWithBinary(
context, 1, device_list, &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &err);
if (err != CL_SUCCESS) {
printf("ERROR: clCreateProgramWithBinary() => %d\n", err);
return -1;
}
free(kernel_bin);
err = clBuildProgram(prog, 1, &device_list[0], NULL, NULL, NULL);
{ // show warnings/errors
// static char log[65536]; memset(log, 0, sizeof(log));
// cl_device_id device_id = 0;
@@ -226,6 +239,7 @@ int allocate(int n_points, int n_features, int n_clusters, float **feature) {
printf("ERROR: clCreateKernel() 0 => %d\n", err);
return -1;
}
kernel2 = clCreateKernel(prog, kernel_swap, &err);
if (err != CL_SUCCESS) {
printf("ERROR: clCreateKernel() 0 => %d\n", err);
@@ -241,6 +255,7 @@ int allocate(int n_points, int n_features, int n_clusters, float **feature) {
n_points * n_features, err);
return -1;
}
d_feature_swap =
clCreateBuffer(context, CL_MEM_READ_WRITE,
n_points * n_features * sizeof(float), NULL, &err);
@@ -249,6 +264,7 @@ int allocate(int n_points, int n_features, int n_clusters, float **feature) {
n_points * n_features, err);
return -1;
}
d_cluster =
clCreateBuffer(context, CL_MEM_READ_WRITE,
n_clusters * n_features * sizeof(float), NULL, &err);
@@ -257,6 +273,7 @@ int allocate(int n_points, int n_features, int n_clusters, float **feature) {
n_clusters * n_features, err);
return -1;
}
d_membership = clCreateBuffer(context, CL_MEM_READ_WRITE,
n_points * sizeof(int), NULL, &err);
if (err != CL_SUCCESS) {
@@ -296,6 +313,8 @@ int allocate(int n_points, int n_features, int n_clusters, float **feature) {
}
membership_OCL = (int *)malloc(n_points * sizeof(int));
return 0;
}
void deallocateMemory() {