Vortex 2.0 changes:

+ Microarchitecture optimizations + 64-bit support + Xilinx FPGA support + LLVM-16 support + Refactoring and quality control fixes minor update minor update minor update minor update minor update minor update cleanup cleanup cache bindings and memory perf refactory minor update minor update hw unit tests fixes minor update minor update minor update minor update minor update minor udpate minor update minor update minor update minor update minor update minor update minor update minor updates minor updates minor update minor update minor update minor update minor update minor update minor updates minor updates minor updates minor updates minor update minor update
2023-10-19 20:51:22 -07:00
parent d69a64c32c
commit c1e168fdbe
1309 changed files with 247412 additions and 311463 deletions
--- a/tests/opencl/cutcp/cutoff.c
+++ b/tests/opencl/cutcp/cutoff.c
@@ -19,6 +19,27 @@
 #include "macros.h"
 #include "ocl.h"

+static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
+  if (NULL == filename || NULL == data || 0 == size)
+    return CL_INVALID_VALUE;
+
+  FILE* fp = fopen(filename, "r");
+  if (NULL == fp) {
+    fprintf(stderr, "Failed to load kernel.");
+    return CL_INVALID_VALUE;
+  }
+  fseek(fp , 0 , SEEK_END);
+  long fsize = ftell(fp);
+  rewind(fp);
+
+  *data = (uint8_t*)malloc(fsize);
+  *size = fread(*data, 1, fsize, fp);
+  
+  fclose(fp);
+  
+  return CL_SUCCESS;
+}
+
 // OpenCL 1.1 support for int3 is not uniform on all implementations, so
 // we use int4 instead.  Only the 'x', 'y', and 'z' fields of xyz are used.
 typedef cl_int4 xyz;
@@ -294,8 +315,6 @@ int gpu_compute_cutoff_potential_lattice(
    printf("\n");
  }

-  printf("Ok!\n");
-
  pb_Context* pb_context;
  pb_context = pb_InitOpenCLContext(parameters);
  if (pb_context == NULL) {
@@ -303,8 +322,6 @@ int gpu_compute_cutoff_potential_lattice(
    return -1;
  }

-  printf("Ok!\n");
-
  cl_int clStatus;
  cl_device_id clDevice = (cl_device_id) pb_context->clDeviceId;
  cl_platform_id clPlatform = (cl_platform_id) pb_context->clPlatformId;
@@ -317,8 +334,13 @@ int gpu_compute_cutoff_potential_lattice(
  
  //const char* clSource[] = {readFile("src/opencl_base/kernel.cl")};
  //cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus);
-  cl_program clProgram = clCreateProgramWithBuiltInKernels(
-      clContext, 1, &clDevice, "opencl_cutoff_potential_lattice", &clStatus);
+  uint8_t *kernel_bin = NULL;
+  size_t kernel_size;
+  cl_int binary_status = 0;  
+  clStatus = read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size);
+  CHECK_ERROR("read_kernel_file")  
+	cl_program clProgram = clCreateProgramWithBinary(
+      clContext, 1, &clDevice, &kernel_size, (const uint8_t**)&kernel_bin, &binary_status, &clStatus);
  CHECK_ERROR("clCreateProgramWithSource")

  char clOptions[50];
@@ -399,9 +421,6 @@ int gpu_compute_cutoff_potential_lattice(
  clStatus = clSetKernelArg(clKernel,10,sizeof(cl_mem),&NbrList);
  CHECK_ERROR("clSetKernelArg")

-  printf("Ok!!\n");
-
-
  /* loop over z-dimension, invoke OpenCL kernel for each x-y plane */
  pb_SwitchToTimer(timers, pb_TimerID_KERNEL);
  printf("Invoking OpenCL kernel on %d region planes...\n", zRegionDim);
@@ -412,26 +431,16 @@ int gpu_compute_cutoff_potential_lattice(
    clStatus = clSetKernelArg(clKernel,8,sizeof(int),&zRegionIndex);
    CHECK_ERROR("clSetKernelArg")

-    printf("Ok**!2\n");
-
    clStatus = clEnqueueNDRangeKernel(clCommandQueue,clKernel,3,NULL,gridDim,blockDim,0,NULL,NULL);

-    printf("Ok**!2\n");
-
    CHECK_ERROR("clEnqueueNDRangeKernel")

-    printf("Ok**!2\n");
-
    clStatus = clFinish(clCommandQueue);

-    printf("Ok**!2\n");
-
    CHECK_ERROR("clFinish")
  }

-  printf("Ok++!\n");
-
-  printf("Finished OpenCL kernel calls                        \n");
+  printf("Finished OpenCL kernel calls\n");

  /* copy result regions from OpenCL device */
  pb_SwitchToTimer(timers, pb_TimerID_COPY);