constant integration updates
This commit is contained in:
@@ -33,8 +33,9 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#include <chrono>
|
||||||
|
|
||||||
#define NUM_DATA 16
|
#define NUM_DATA (16+2)
|
||||||
|
|
||||||
#define CL_CHECK(_expr) \
|
#define CL_CHECK(_expr) \
|
||||||
do { \
|
do { \
|
||||||
@@ -222,19 +223,15 @@ int main(int argc, char **argv) {
|
|||||||
// Build program
|
// Build program
|
||||||
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
|
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
|
||||||
|
|
||||||
|
size_t nbytes = sizeof(float) * NUM_DATA * NUM_DATA;
|
||||||
|
|
||||||
printf("attempting to create input buffer\n");
|
printf("attempting to create input buffer\n");
|
||||||
fflush(stdout);
|
|
||||||
cl_mem input_buffer;
|
cl_mem input_buffer;
|
||||||
input_buffer = CL_CHECK_ERR(
|
input_buffer = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_ONLY, nbytes, NULL, &_err));
|
||||||
clCreateBuffer(context, CL_MEM_READ_ONLY,
|
|
||||||
sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err));
|
|
||||||
|
|
||||||
printf("attempting to create output buffer\n");
|
printf("attempting to create output buffer\n");
|
||||||
fflush(stdout);
|
|
||||||
cl_mem output_buffer;
|
cl_mem output_buffer;
|
||||||
output_buffer = CL_CHECK_ERR(
|
output_buffer = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_WRITE_ONLY, nbytes, NULL, &_err));
|
||||||
clCreateBuffer(context, CL_MEM_WRITE_ONLY,
|
|
||||||
sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err));
|
|
||||||
|
|
||||||
memObjects[0] = input_buffer;
|
memObjects[0] = input_buffer;
|
||||||
memObjects[1] = output_buffer;
|
memObjects[1] = output_buffer;
|
||||||
@@ -252,10 +249,8 @@ int main(int argc, char **argv) {
|
|||||||
float m8 = 1.0;
|
float m8 = 1.0;
|
||||||
|
|
||||||
printf("attempting to create kernel\n");
|
printf("attempting to create kernel\n");
|
||||||
fflush(stdout);
|
|
||||||
kernel = CL_CHECK_ERR(clCreateKernel(program, "sfilter", &_err));
|
kernel = CL_CHECK_ERR(clCreateKernel(program, "sfilter", &_err));
|
||||||
printf("setting up kernel args cl_mem:%lx \n", input_buffer);
|
printf("setting up kernel args cl_mem:%lx \n", input_buffer);
|
||||||
fflush(stdout);
|
|
||||||
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_buffer), &input_buffer));
|
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_buffer), &input_buffer));
|
||||||
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(output_buffer), &output_buffer));
|
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(output_buffer), &output_buffer));
|
||||||
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(ldc), (&ldc)));
|
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(ldc), (&ldc)));
|
||||||
@@ -270,40 +265,33 @@ int main(int argc, char **argv) {
|
|||||||
CL_CHECK(clSetKernelArg(kernel, 11, sizeof(m8), (&m8)));
|
CL_CHECK(clSetKernelArg(kernel, 11, sizeof(m8), (&m8)));
|
||||||
|
|
||||||
printf("attempting to enqueue write buffer\n");
|
printf("attempting to enqueue write buffer\n");
|
||||||
fflush(stdout);
|
|
||||||
for (int i = 0; i < NUM_DATA * NUM_DATA; i++) {
|
|
||||||
float in = ((float)rand() / (float)(RAND_MAX)) * 100.0;
|
|
||||||
CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE,
|
|
||||||
i * sizeof(float), 4, &in, 0, NULL, NULL));
|
|
||||||
}
|
|
||||||
|
|
||||||
cl_event kernel_completion;
|
float* h_src = (float*)malloc(nbytes);
|
||||||
|
for (int i = 0; i < NUM_DATA * NUM_DATA; i++) {
|
||||||
|
h_src[i] = ((float)rand() / (float)(RAND_MAX)) * 100.0;
|
||||||
|
}
|
||||||
|
CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE, 0, nbytes, h_src, 0, NULL, NULL));
|
||||||
|
free(h_src);
|
||||||
|
|
||||||
size_t global_offset[2] = {1, 1};
|
size_t global_offset[2] = {1, 1};
|
||||||
size_t global_work_size[2] = {NUM_DATA - 2, NUM_DATA - 2}; // avoid the edges
|
size_t global_work_size[2] = {NUM_DATA - 2, NUM_DATA - 2}; // avoid the edges
|
||||||
const size_t local_work_size[2] = {64, 1};
|
const size_t local_work_size[2] = {NUM_DATA - 2, 1};
|
||||||
printf("attempting to enqueue kernel\n");
|
printf("attempting to enqueue kernel\n");
|
||||||
fflush(stdout);
|
auto time_start = std::chrono::high_resolution_clock::now();
|
||||||
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 2, global_offset,
|
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 2, global_offset,
|
||||||
global_work_size, local_work_size, 0, NULL,
|
global_work_size, local_work_size, 0, NULL, NULL));
|
||||||
&kernel_completion));
|
CL_CHECK(clFinish(queue));
|
||||||
printf("Enqueue'd kernel\n");
|
auto time_end = std::chrono::high_resolution_clock::now();
|
||||||
fflush(stdout);
|
double elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(time_end - time_start).count();
|
||||||
cl_ulong time_start, time_end;
|
printf("Elapsed time: %lg ms\n", elapsed);
|
||||||
CL_CHECK(clWaitForEvents(1, &kernel_completion));
|
|
||||||
CL_CHECK(clGetEventProfilingInfo(kernel_completion,
|
printf("Download destination buffer\n");
|
||||||
CL_PROFILING_COMMAND_START,
|
float* h_dst = (float*)malloc(nbytes);
|
||||||
sizeof(time_start), &time_start, NULL));
|
CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, nbytes, h_dst, 0, NULL, NULL));
|
||||||
CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_END,
|
|
||||||
sizeof(time_end), &time_end, NULL));
|
|
||||||
double elapsed = time_end - time_start;
|
|
||||||
printf("time(ns):%lg\n", elapsed);
|
|
||||||
CL_CHECK(clReleaseEvent(kernel_completion));
|
|
||||||
|
|
||||||
printf("Result:");
|
printf("Result:");
|
||||||
for (int i = 0; i < NUM_DATA * NUM_DATA; i++) {
|
for (int i = 0; i < NUM_DATA * NUM_DATA; i++) {
|
||||||
float data;
|
float data = h_dst[i];
|
||||||
CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE,
|
|
||||||
i * sizeof(float), 4, &data, 0, NULL, NULL));
|
|
||||||
// printf(" %f", data);
|
// printf(" %f", data);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1" make -C driver/opae/vlsim
|
|||||||
make -C driver/tests/dogfood run-vlsim
|
make -C driver/tests/dogfood run-vlsim
|
||||||
make -C benchmarks/opencl/sgemm run-vlsim
|
make -C benchmarks/opencl/sgemm run-vlsim
|
||||||
|
|
||||||
# test L3 and 2 L2's with 4 cores each
|
# test L3 and 2xL2's with 4 cores each
|
||||||
make -C driver/opae/vlsim clean
|
make -C driver/opae/vlsim clean
|
||||||
CONFIGS="-DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 -DL3_ENABLE=1" make -C driver/opae/vlsim
|
CONFIGS="-DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 -DL3_ENABLE=1" make -C driver/opae/vlsim
|
||||||
make -C driver/tests/dogfood run-vlsim
|
make -C driver/tests/dogfood run-vlsim
|
||||||
|
|||||||
Reference in New Issue
Block a user