From e4d7a3deb464051317d0aaab276b14c36b65830b Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 15 Nov 2020 10:28:32 -0800 Subject: [PATCH] constant integration updates --- benchmarks/opencl/sfilter/main.cc | 58 ++++++++++++------------------- ci/blackbox.sh | 2 +- 2 files changed, 24 insertions(+), 36 deletions(-) diff --git a/benchmarks/opencl/sfilter/main.cc b/benchmarks/opencl/sfilter/main.cc index f5fe5f54..98c644ca 100644 --- a/benchmarks/opencl/sfilter/main.cc +++ b/benchmarks/opencl/sfilter/main.cc @@ -33,8 +33,9 @@ #include #include #include +#include -#define NUM_DATA 16 +#define NUM_DATA (16+2) #define CL_CHECK(_expr) \ do { \ @@ -222,19 +223,15 @@ int main(int argc, char **argv) { // Build program CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL)); + size_t nbytes = sizeof(float) * NUM_DATA * NUM_DATA; + printf("attempting to create input buffer\n"); - fflush(stdout); cl_mem input_buffer; - input_buffer = CL_CHECK_ERR( - clCreateBuffer(context, CL_MEM_READ_ONLY, - sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err)); + input_buffer = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_ONLY, nbytes, NULL, &_err)); printf("attempting to create output buffer\n"); - fflush(stdout); cl_mem output_buffer; - output_buffer = CL_CHECK_ERR( - clCreateBuffer(context, CL_MEM_WRITE_ONLY, - sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err)); + output_buffer = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_WRITE_ONLY, nbytes, NULL, &_err)); memObjects[0] = input_buffer; memObjects[1] = output_buffer; @@ -252,10 +249,8 @@ int main(int argc, char **argv) { float m8 = 1.0; printf("attempting to create kernel\n"); - fflush(stdout); kernel = CL_CHECK_ERR(clCreateKernel(program, "sfilter", &_err)); printf("setting up kernel args cl_mem:%lx \n", input_buffer); - fflush(stdout); CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_buffer), &input_buffer)); CL_CHECK(clSetKernelArg(kernel, 1, sizeof(output_buffer), &output_buffer)); CL_CHECK(clSetKernelArg(kernel, 2, sizeof(ldc), (&ldc))); @@ -270,40 +265,33 @@ int main(int argc, char **argv) { CL_CHECK(clSetKernelArg(kernel, 11, sizeof(m8), (&m8))); printf("attempting to enqueue write buffer\n"); - fflush(stdout); + + float* h_src = (float*)malloc(nbytes); for (int i = 0; i < NUM_DATA * NUM_DATA; i++) { - float in = ((float)rand() / (float)(RAND_MAX)) * 100.0; - CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE, - i * sizeof(float), 4, &in, 0, NULL, NULL)); + h_src[i] = ((float)rand() / (float)(RAND_MAX)) * 100.0; } + CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE, 0, nbytes, h_src, 0, NULL, NULL)); + free(h_src); - cl_event kernel_completion; size_t global_offset[2] = {1, 1}; size_t global_work_size[2] = {NUM_DATA - 2, NUM_DATA - 2}; // avoid the edges - const size_t local_work_size[2] = {64, 1}; + const size_t local_work_size[2] = {NUM_DATA - 2, 1}; printf("attempting to enqueue kernel\n"); - fflush(stdout); + auto time_start = std::chrono::high_resolution_clock::now(); CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 2, global_offset, - global_work_size, local_work_size, 0, NULL, - &kernel_completion)); - printf("Enqueue'd kernel\n"); - fflush(stdout); - cl_ulong time_start, time_end; - CL_CHECK(clWaitForEvents(1, &kernel_completion)); - CL_CHECK(clGetEventProfilingInfo(kernel_completion, - CL_PROFILING_COMMAND_START, - sizeof(time_start), &time_start, NULL)); - CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_END, - sizeof(time_end), &time_end, NULL)); - double elapsed = time_end - time_start; - printf("time(ns):%lg\n", elapsed); - CL_CHECK(clReleaseEvent(kernel_completion)); + global_work_size, local_work_size, 0, NULL, NULL)); + CL_CHECK(clFinish(queue)); + auto time_end = std::chrono::high_resolution_clock::now(); + double elapsed = std::chrono::duration_cast(time_end - time_start).count(); + printf("Elapsed time: %lg ms\n", elapsed); + + printf("Download destination buffer\n"); + float* h_dst = (float*)malloc(nbytes); + CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, nbytes, h_dst, 0, NULL, NULL)); printf("Result:"); for (int i = 0; i < NUM_DATA * NUM_DATA; i++) { - float data; - CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, - i * sizeof(float), 4, &data, 0, NULL, NULL)); + float data = h_dst[i]; // printf(" %f", data); } printf("\n"); diff --git a/ci/blackbox.sh b/ci/blackbox.sh index dc5fb10e..c84496dc 100755 --- a/ci/blackbox.sh +++ b/ci/blackbox.sh @@ -18,7 +18,7 @@ CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1" make -C driver/opae/vlsim make -C driver/tests/dogfood run-vlsim make -C benchmarks/opencl/sgemm run-vlsim -# test L3 and 2 L2's with 4 cores each +# test L3 and 2xL2's with 4 cores each make -C driver/opae/vlsim clean CONFIGS="-DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 -DL3_ENABLE=1" make -C driver/opae/vlsim make -C driver/tests/dogfood run-vlsim