Merge branch 'master' of https://github.gatech.edu/casl/Vortex
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -34,25 +34,27 @@
|
|||||||
|
|
||||||
#define NUM_DATA 64
|
#define NUM_DATA 64
|
||||||
|
|
||||||
#define CL_CHECK(_expr) \
|
#define CL_CHECK(_expr) \
|
||||||
do { \
|
do { \
|
||||||
cl_int _err = _expr; \
|
cl_int _err = _expr; \
|
||||||
if (_err == CL_SUCCESS) \
|
if (_err == CL_SUCCESS) \
|
||||||
break; \
|
break; \
|
||||||
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
printf("OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
||||||
abort(); \
|
cleanup(); \
|
||||||
} while (0)
|
exit(-1); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
#define CL_CHECK_ERR(_expr) \
|
#define CL_CHECK2(_expr) \
|
||||||
({ \
|
({ \
|
||||||
cl_int _err = CL_INVALID_VALUE; \
|
cl_int _err = CL_INVALID_VALUE; \
|
||||||
decltype(_expr) _ret = _expr; \
|
decltype(_expr) _ret = _expr; \
|
||||||
if (_err != CL_SUCCESS) { \
|
if (_err != CL_SUCCESS) { \
|
||||||
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
printf("OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
||||||
abort(); \
|
cleanup(); \
|
||||||
} \
|
exit(-1); \
|
||||||
_ret; \
|
} \
|
||||||
})
|
_ret; \
|
||||||
|
})
|
||||||
|
|
||||||
void pfn_notify(const char *errinfo, const void *private_info, size_t cb,
|
void pfn_notify(const char *errinfo, const void *private_info, size_t cb,
|
||||||
void *user_data) {
|
void *user_data) {
|
||||||
@@ -80,28 +82,26 @@ static int read_kernel_file(const char* filename, uint8_t** data, size_t* size)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cl_device_id device_id = NULL;
|
||||||
uint8_t *kernel_bin = NULL;
|
uint8_t *kernel_bin = NULL;
|
||||||
|
cl_context context = 0;
|
||||||
|
cl_kernel kernel = 0;
|
||||||
|
cl_command_queue queue = 0;
|
||||||
|
cl_program program = 0;
|
||||||
|
cl_mem memObjects[3] = {0, 0, 0};
|
||||||
|
|
||||||
///
|
///
|
||||||
// Cleanup any created OpenCL resources
|
// Cleanup any created OpenCL resources
|
||||||
//
|
//
|
||||||
void Cleanup(cl_context context, cl_command_queue commandQueue,
|
void cleanup() {
|
||||||
cl_program program, cl_kernel kernel, cl_mem memObjects[3]) {
|
|
||||||
for (int i = 0; i < 3; i++) {
|
for (int i = 0; i < 3; i++) {
|
||||||
if (memObjects[i] != 0)
|
if (memObjects[i]) clReleaseMemObject(memObjects[i]);
|
||||||
clReleaseMemObject(memObjects[i]);
|
|
||||||
}
|
}
|
||||||
if (commandQueue != 0)
|
if (queue) clReleaseCommandQueue(queue);
|
||||||
clReleaseCommandQueue(commandQueue);
|
if (kernel) clReleaseKernel(kernel);
|
||||||
|
if (program) clReleaseProgram(program);
|
||||||
if (kernel != 0)
|
if (context) clReleaseContext(context);
|
||||||
clReleaseKernel(kernel);
|
if (device_id) clReleaseDevice(device_id);
|
||||||
|
|
||||||
if (program != 0)
|
|
||||||
clReleaseProgram(program);
|
|
||||||
|
|
||||||
if (context != 0)
|
|
||||||
clReleaseContext(context);
|
|
||||||
|
|
||||||
if (kernel_bin) free(kernel_bin);
|
if (kernel_bin) free(kernel_bin);
|
||||||
}
|
}
|
||||||
@@ -110,7 +110,6 @@ int main(int argc, char **argv) {
|
|||||||
printf("enter demo main\n");
|
printf("enter demo main\n");
|
||||||
|
|
||||||
cl_platform_id platform_id;
|
cl_platform_id platform_id;
|
||||||
cl_device_id device_id;
|
|
||||||
size_t kernel_size;
|
size_t kernel_size;
|
||||||
cl_int binary_status = 0;
|
cl_int binary_status = 0;
|
||||||
int i;
|
int i;
|
||||||
@@ -123,31 +122,23 @@ int main(int argc, char **argv) {
|
|||||||
CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL));
|
CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL));
|
||||||
CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL));
|
CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL));
|
||||||
|
|
||||||
cl_context context;
|
context = CL_CHECK2(clCreateContext(NULL, 1, &device_id, &pfn_notify, NULL, &_err));
|
||||||
context = CL_CHECK_ERR(
|
|
||||||
clCreateContext(NULL, 1, &device_id, &pfn_notify, NULL, &_err));
|
|
||||||
|
|
||||||
cl_command_queue queue;
|
queue = CL_CHECK2(clCreateCommandQueue(context, device_id,
|
||||||
queue = CL_CHECK_ERR(clCreateCommandQueue(context, device_id,
|
|
||||||
CL_QUEUE_PROFILING_ENABLE, &_err));
|
CL_QUEUE_PROFILING_ENABLE, &_err));
|
||||||
|
|
||||||
cl_kernel kernel = 0;
|
|
||||||
cl_mem memObjects[3] = {0, 0, 0};
|
|
||||||
|
|
||||||
// Create OpenCL program - first attempt to load cached binary.
|
// Create OpenCL program - first attempt to load cached binary.
|
||||||
// If that is not available, then create the program from source
|
// If that is not available, then create the program from source
|
||||||
// and store the binary for future use.
|
// and store the binary for future use.
|
||||||
std::cout << "Attempting to create program from binary..." << std::endl;
|
std::cout << "Attempting to create program from binary..." << std::endl;
|
||||||
// cl_program program = CreateProgramFromBinary(context, device_id,
|
// cl_program program = CreateProgramFromBinary(context, device_id,
|
||||||
// "kernel.cl.bin");
|
// "kernel.cl.bin");
|
||||||
cl_program program = CL_CHECK_ERR(clCreateProgramWithBinary(
|
program = CL_CHECK2(clCreateProgramWithBinary(
|
||||||
context, 1, &device_id, &kernel_size, &kernel_bin, &binary_status, &_err));
|
context, 1, &device_id, &kernel_size, &kernel_bin, &binary_status, &_err));
|
||||||
if (program == NULL) {
|
if (program == NULL) {
|
||||||
std::cerr << "Failed to write program binary" << std::endl;
|
printf("clCreateProgramWithBinary() failed\n");
|
||||||
Cleanup(context, queue, program, kernel, memObjects);
|
cleanup();
|
||||||
return 1;
|
return -1;
|
||||||
} else {
|
|
||||||
std::cout << "Read program from binary." << std::endl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build program
|
// Build program
|
||||||
@@ -156,19 +147,19 @@ int main(int argc, char **argv) {
|
|||||||
printf("attempting to create input buffer\n");
|
printf("attempting to create input buffer\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
cl_mem input_bufferA;
|
cl_mem input_bufferA;
|
||||||
input_bufferA = CL_CHECK_ERR(
|
input_bufferA = CL_CHECK2(
|
||||||
clCreateBuffer(context, CL_MEM_READ_ONLY,
|
clCreateBuffer(context, CL_MEM_READ_ONLY,
|
||||||
sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err));
|
sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err));
|
||||||
|
|
||||||
cl_mem input_bufferB;
|
cl_mem input_bufferB;
|
||||||
input_bufferB = CL_CHECK_ERR(
|
input_bufferB = CL_CHECK2(
|
||||||
clCreateBuffer(context, CL_MEM_READ_ONLY,
|
clCreateBuffer(context, CL_MEM_READ_ONLY,
|
||||||
sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err));
|
sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err));
|
||||||
|
|
||||||
printf("attempting to create output buffer\n");
|
printf("attempting to create output buffer\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
cl_mem output_buffer;
|
cl_mem output_buffer;
|
||||||
output_buffer = CL_CHECK_ERR(
|
output_buffer = CL_CHECK2(
|
||||||
clCreateBuffer(context, CL_MEM_WRITE_ONLY,
|
clCreateBuffer(context, CL_MEM_WRITE_ONLY,
|
||||||
sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err));
|
sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err));
|
||||||
|
|
||||||
@@ -180,7 +171,7 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
printf("attempting to create kernel\n");
|
printf("attempting to create kernel\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
kernel = CL_CHECK_ERR(clCreateKernel(program, "sgemm", &_err));
|
kernel = CL_CHECK2(clCreateKernel(program, "sgemm", &_err));
|
||||||
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_bufferA), &input_bufferA));
|
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_bufferA), &input_bufferA));
|
||||||
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(input_bufferB), &input_bufferB));
|
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(input_bufferB), &input_bufferB));
|
||||||
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer));
|
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer));
|
||||||
@@ -209,7 +200,7 @@ int main(int argc, char **argv) {
|
|||||||
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size,
|
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size,
|
||||||
local_work_size, 0, NULL,
|
local_work_size, 0, NULL,
|
||||||
&kernel_completion));
|
&kernel_completion));
|
||||||
printf("Enqueue'd kerenel\n");
|
printf("Enqueue'd kernel\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
cl_ulong time_start, time_end;
|
cl_ulong time_start, time_end;
|
||||||
CL_CHECK(clWaitForEvents(1, &kernel_completion));
|
CL_CHECK(clWaitForEvents(1, &kernel_completion));
|
||||||
@@ -231,13 +222,8 @@ int main(int argc, char **argv) {
|
|||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
||||||
CL_CHECK(clReleaseMemObject(memObjects[0]));
|
// Clean up
|
||||||
CL_CHECK(clReleaseMemObject(memObjects[1]));
|
cleanup();
|
||||||
CL_CHECK(clReleaseMemObject(memObjects[2]));
|
|
||||||
|
|
||||||
CL_CHECK(clReleaseKernel(kernel));
|
|
||||||
CL_CHECK(clReleaseProgram(program));
|
|
||||||
CL_CHECK(clReleaseContext(context));
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
Binary file not shown.
@@ -31,6 +31,7 @@
|
|||||||
})
|
})
|
||||||
|
|
||||||
int exitcode = 0;
|
int exitcode = 0;
|
||||||
|
cl_device_id device_id = NULL;
|
||||||
cl_context context = NULL;
|
cl_context context = NULL;
|
||||||
cl_command_queue commandQueue = NULL;
|
cl_command_queue commandQueue = NULL;
|
||||||
cl_program program = NULL;
|
cl_program program = NULL;
|
||||||
@@ -72,6 +73,8 @@ static void cleanup() {
|
|||||||
if (b_memobj) clReleaseMemObject(b_memobj);
|
if (b_memobj) clReleaseMemObject(b_memobj);
|
||||||
if (c_memobj) clReleaseMemObject(c_memobj);
|
if (c_memobj) clReleaseMemObject(c_memobj);
|
||||||
if (context) clReleaseContext(context);
|
if (context) clReleaseContext(context);
|
||||||
|
if (device_id) clReleaseDevice(device_id);
|
||||||
|
|
||||||
if (kernel_bin) free(kernel_bin);
|
if (kernel_bin) free(kernel_bin);
|
||||||
if (A) free(A);
|
if (A) free(A);
|
||||||
if (B) free(B);
|
if (B) free(B);
|
||||||
@@ -104,7 +107,6 @@ int main (int argc, char **argv) {
|
|||||||
printf("enter demo main\n");
|
printf("enter demo main\n");
|
||||||
|
|
||||||
cl_platform_id platform_id;
|
cl_platform_id platform_id;
|
||||||
cl_device_id device_id;
|
|
||||||
size_t kernel_size;
|
size_t kernel_size;
|
||||||
cl_int binary_status = 0;
|
cl_int binary_status = 0;
|
||||||
int i;
|
int i;
|
||||||
@@ -139,6 +141,11 @@ int main (int argc, char **argv) {
|
|||||||
// Create program from kernel source
|
// Create program from kernel source
|
||||||
program = CL_CHECK2(clCreateProgramWithBinary(
|
program = CL_CHECK2(clCreateProgramWithBinary(
|
||||||
context, 1, &device_id, &kernel_size, &kernel_bin, &binary_status, &_err));
|
context, 1, &device_id, &kernel_size, &kernel_bin, &binary_status, &_err));
|
||||||
|
if (program == NULL) {
|
||||||
|
printf("clCreateProgramWithBinary() failed\n");
|
||||||
|
cleanup();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
// Build program
|
// Build program
|
||||||
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
|
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
|
||||||
|
|||||||
@@ -91,22 +91,22 @@ extern int vx_upload_kernel_file(vx_device_h device, const char* filename) {
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int vx_get_perf(vx_device_h device, size_t* cycles, size_t* instrs) {
|
extern int vx_get_perf(vx_device_h device, int core_id, size_t* cycles, size_t* instrs) {
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
unsigned value;
|
unsigned value;
|
||||||
|
|
||||||
if (cycles) {
|
if (cycles) {
|
||||||
ret |= vx_csr_get(device, 0, CSR_CYCLE_H, &value);
|
ret |= vx_csr_get(device, core_id, CSR_CYCLE_H, &value);
|
||||||
*cycles = value;
|
*cycles = value;
|
||||||
ret |= vx_csr_get(device, 0, CSR_CYCLE, &value);
|
ret |= vx_csr_get(device, core_id, CSR_CYCLE, &value);
|
||||||
*cycles = (*cycles << 32) | value;
|
*cycles = (*cycles << 32) | value;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (instrs) {
|
if (instrs) {
|
||||||
ret |= vx_csr_get(device, 0, CSR_INSTRET_H, &value);
|
ret |= vx_csr_get(device, core_id, CSR_INSTRET_H, &value);
|
||||||
*instrs = value;
|
*instrs = value;
|
||||||
ret |= vx_csr_get(device, 0, CSR_INSTRET, &value);
|
ret |= vx_csr_get(device, core_id, CSR_INSTRET, &value);
|
||||||
*instrs = (*instrs << 32) | value;
|
*instrs = (*instrs << 32) | value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -58,10 +58,10 @@ int vx_start(vx_device_h hdevice);
|
|||||||
int vx_ready_wait(vx_device_h hdevice, long long timeout);
|
int vx_ready_wait(vx_device_h hdevice, long long timeout);
|
||||||
|
|
||||||
// set device constant registers
|
// set device constant registers
|
||||||
int vx_csr_set(vx_device_h hdevice, int core, int address, unsigned value);
|
int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value);
|
||||||
|
|
||||||
// get device constant registers
|
// get device constant registers
|
||||||
int vx_csr_get(vx_device_h hdevice, int core, int address, unsigned* value);
|
int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* value);
|
||||||
|
|
||||||
////////////////////////////// UTILITY FUNCIONS ///////////////////////////////
|
////////////////////////////// UTILITY FUNCIONS ///////////////////////////////
|
||||||
|
|
||||||
@@ -72,7 +72,7 @@ int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size)
|
|||||||
int vx_upload_kernel_file(vx_device_h device, const char* filename);
|
int vx_upload_kernel_file(vx_device_h device, const char* filename);
|
||||||
|
|
||||||
// get performance counters
|
// get performance counters
|
||||||
int vx_get_perf(vx_device_h device, size_t* cycles, size_t* instrs);
|
int vx_get_perf(vx_device_h device, int core_id, size_t* cycles, size_t* instrs);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,6 +17,9 @@ CXXFLAGS +=-fstack-protector
|
|||||||
# Position independent code
|
# Position independent code
|
||||||
CXXFLAGS += -fPIC
|
CXXFLAGS += -fPIC
|
||||||
|
|
||||||
|
# Dump perf stats
|
||||||
|
CXXFLAGS += -DDUMP_PERF_STATS
|
||||||
|
|
||||||
# Enable scope analyzer
|
# Enable scope analyzer
|
||||||
#CXXFLAGS += -DSCOPE
|
#CXXFLAGS += -DSCOPE
|
||||||
|
|
||||||
|
|||||||
@@ -211,14 +211,29 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
|||||||
vx_scope_stop(device->fpga, 0);
|
vx_scope_stop(device->fpga, 0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
{
|
#ifdef DUMP_PERF_STATS
|
||||||
// Dump perf stats
|
// Dump perf stats
|
||||||
|
if (device->num_cores > 1) {
|
||||||
|
uint64_t total_instrs = 0, total_cycles = 0;
|
||||||
|
for (unsigned core_id = 0; core_id < device->num_cores; ++core_id) {
|
||||||
|
uint64_t instrs, cycles;
|
||||||
|
int ret = vx_get_perf(hdevice, core_id, &instrs, &cycles);
|
||||||
|
assert(ret == 0);
|
||||||
|
float IPC = (float)(double(instrs) / double(cycles));
|
||||||
|
fprintf(stdout, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs, cycles, IPC);
|
||||||
|
total_instrs += instrs;
|
||||||
|
total_cycles = std::max<uint64_t>(total_cycles, cycles);
|
||||||
|
}
|
||||||
|
float IPC = (float)(double(total_instrs) / double(total_cycles));
|
||||||
|
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", total_instrs, total_cycles, IPC);
|
||||||
|
} else {
|
||||||
uint64_t instrs, cycles;
|
uint64_t instrs, cycles;
|
||||||
int ret = vx_get_perf(hdevice, &instrs, &cycles);
|
int ret = vx_get_perf(hdevice, 0, &instrs, &cycles);
|
||||||
float IPC = (float)(double(instrs) / double(cycles));
|
float IPC = (float)(double(instrs) / double(cycles));
|
||||||
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
|
|
||||||
assert(ret == 0);
|
assert(ret == 0);
|
||||||
|
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
fpgaClose(device->fpga);
|
fpgaClose(device->fpga);
|
||||||
|
|
||||||
@@ -480,7 +495,7 @@ extern int vx_start(vx_device_h hdevice) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// set device constant registers
|
// set device constant registers
|
||||||
extern int vx_csr_set(vx_device_h hdevice, int core, int address, unsigned value) {
|
extern int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value) {
|
||||||
if (nullptr == hdevice)
|
if (nullptr == hdevice)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
@@ -491,8 +506,8 @@ extern int vx_csr_set(vx_device_h hdevice, int core, int address, unsigned value
|
|||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
// write CSR value
|
// write CSR value
|
||||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core));
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core_id));
|
||||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, address));
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, addr));
|
||||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA, value));
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA, value));
|
||||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CSR_WRITE));
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CSR_WRITE));
|
||||||
|
|
||||||
@@ -500,7 +515,7 @@ extern int vx_csr_set(vx_device_h hdevice, int core, int address, unsigned value
|
|||||||
}
|
}
|
||||||
|
|
||||||
// get device constant registers
|
// get device constant registers
|
||||||
extern int vx_csr_get(vx_device_h hdevice, int core, int address, unsigned* value) {
|
extern int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* value) {
|
||||||
if (nullptr == hdevice || nullptr == value)
|
if (nullptr == hdevice || nullptr == value)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
@@ -512,8 +527,8 @@ extern int vx_csr_get(vx_device_h hdevice, int core, int address, unsigned* valu
|
|||||||
|
|
||||||
|
|
||||||
// write CSR value
|
// write CSR value
|
||||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core));
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core_id));
|
||||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, address));
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, addr));
|
||||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CSR_READ));
|
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CSR_READ));
|
||||||
|
|
||||||
// Ensure ready for new command
|
// Ensure ready for new command
|
||||||
|
|||||||
@@ -28,6 +28,8 @@ CFLAGS += -fPIC
|
|||||||
|
|
||||||
CFLAGS += -DUSE_RTLSIM $(CONFIGS)
|
CFLAGS += -DUSE_RTLSIM $(CONFIGS)
|
||||||
|
|
||||||
|
CFLAGS += -DDUMP_PERF_STATS
|
||||||
|
|
||||||
LDFLAGS += -shared -pthread
|
LDFLAGS += -shared -pthread
|
||||||
# LDFLAGS += -dynamiclib -pthread
|
# LDFLAGS += -dynamiclib -pthread
|
||||||
|
|
||||||
|
|||||||
@@ -69,7 +69,6 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
~vx_device() {
|
~vx_device() {
|
||||||
simulator_.print_stats(std::cout);
|
|
||||||
if (future_.valid()) {
|
if (future_.valid()) {
|
||||||
future_.wait();
|
future_.wait();
|
||||||
}
|
}
|
||||||
@@ -152,6 +151,28 @@ public:
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int set_csr(int core_id, int addr, unsigned value) {
|
||||||
|
if (future_.valid()) {
|
||||||
|
future_.wait(); // ensure prior run completed
|
||||||
|
}
|
||||||
|
simulator_.set_csr(core_id, addr, value);
|
||||||
|
while (simulator_.is_busy()) {
|
||||||
|
simulator_.step();
|
||||||
|
};
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_csr(int core_id, int addr, unsigned *value) {
|
||||||
|
if (future_.valid()) {
|
||||||
|
future_.wait(); // ensure prior run completed
|
||||||
|
}
|
||||||
|
simulator_.get_csr(core_id, addr, value);
|
||||||
|
while (simulator_.is_busy()) {
|
||||||
|
simulator_.step();
|
||||||
|
};
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
size_t mem_allocation_;
|
size_t mem_allocation_;
|
||||||
@@ -215,6 +236,29 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
|||||||
|
|
||||||
vx_device *device = ((vx_device*)hdevice);
|
vx_device *device = ((vx_device*)hdevice);
|
||||||
|
|
||||||
|
#ifdef DUMP_PERF_STATS
|
||||||
|
unsigned num_cores;
|
||||||
|
vx_csr_get(hdevice, 0, CSR_NC, &num_cores);
|
||||||
|
if (num_cores > 1) {
|
||||||
|
uint64_t total_instrs = 0, total_cycles = 0;
|
||||||
|
for (unsigned core_id = 0; core_id < num_cores; ++core_id) {
|
||||||
|
uint64_t instrs, cycles;
|
||||||
|
vx_get_perf(hdevice, core_id, &instrs, &cycles);
|
||||||
|
float IPC = (float)(double(instrs) / double(cycles));
|
||||||
|
fprintf(stdout, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs, cycles, IPC);
|
||||||
|
total_instrs += instrs;
|
||||||
|
total_cycles = std::max<uint64_t>(total_cycles, cycles);
|
||||||
|
}
|
||||||
|
float IPC = (float)(double(total_instrs) / double(total_cycles));
|
||||||
|
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", total_instrs, total_cycles, IPC);
|
||||||
|
} else {
|
||||||
|
uint64_t instrs, cycles;
|
||||||
|
vx_get_perf(hdevice, 0, &instrs, &cycles);
|
||||||
|
float IPC = (float)(double(instrs) / double(cycles));
|
||||||
|
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
delete device;
|
delete device;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@@ -324,10 +368,20 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
|||||||
return device->wait(timeout);
|
return device->wait(timeout);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned /*value*/) {
|
extern int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value) {
|
||||||
return -1;
|
if (nullptr == hdevice)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
vx_device *device = ((vx_device*)hdevice);
|
||||||
|
|
||||||
|
return device->set_csr(core_id, addr, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned* /*value*/) {
|
extern int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* value) {
|
||||||
return -1;
|
if (nullptr == hdevice)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
vx_device *device = ((vx_device*)hdevice);
|
||||||
|
|
||||||
|
return device->get_csr(core_id, addr, value);
|
||||||
}
|
}
|
||||||
@@ -358,10 +358,10 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
|||||||
return device->wait(timeout);
|
return device->wait(timeout);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned /*value*/) {
|
extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned /*value*/) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned* /*value*/) {
|
extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned* /*value*/) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@@ -48,10 +48,10 @@ extern int vx_ready_wait(vx_device_h /*hdevice*/, long long /*timeout*/) {
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned /*value*/) {
|
extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned /*value*/) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned* /*value*/) {
|
extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned* /*value*/) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@@ -22,6 +22,7 @@ Simulator::Simulator() {
|
|||||||
|
|
||||||
dram_rsp_active_ = false;
|
dram_rsp_active_ = false;
|
||||||
snp_req_active_ = false;
|
snp_req_active_ = false;
|
||||||
|
csr_req_active_ = false;
|
||||||
|
|
||||||
#ifdef VCD_OUTPUT
|
#ifdef VCD_OUTPUT
|
||||||
Verilated::traceEverOn(true);
|
Verilated::traceEverOn(true);
|
||||||
@@ -163,15 +164,6 @@ void Simulator::eval_io_bus() {
|
|||||||
vortex_->io_rsp_valid = 0;
|
vortex_->io_rsp_valid = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Simulator::eval_csr_bus() {
|
|
||||||
vortex_->csr_io_req_valid = 0;
|
|
||||||
vortex_->csr_io_req_coreid = 0;
|
|
||||||
vortex_->csr_io_req_addr = 0;
|
|
||||||
vortex_->csr_io_req_rw = 0;
|
|
||||||
vortex_->csr_io_req_data = 0;
|
|
||||||
vortex_->csr_io_rsp_ready = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Simulator::eval_snp_bus() {
|
void Simulator::eval_snp_bus() {
|
||||||
if (snp_req_active_) {
|
if (snp_req_active_) {
|
||||||
if (vortex_->snp_rsp_valid) {
|
if (vortex_->snp_rsp_valid) {
|
||||||
@@ -204,6 +196,27 @@ void Simulator::eval_snp_bus() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Simulator::eval_csr_bus() {
|
||||||
|
if (csr_req_active_) {
|
||||||
|
if (vortex_->csr_io_req_rw) {
|
||||||
|
if (vortex_->csr_io_req_ready) {
|
||||||
|
vortex_->snp_req_valid = 0;
|
||||||
|
csr_req_active_ = false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (vortex_->csr_io_rsp_valid) {
|
||||||
|
*csr_rsp_value_ = vortex_->csr_io_rsp_data;
|
||||||
|
vortex_->snp_req_valid = 0;
|
||||||
|
vortex_->csr_io_rsp_ready = 0;
|
||||||
|
csr_req_active_ = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
vortex_->csr_io_req_valid = 0;
|
||||||
|
vortex_->csr_io_rsp_ready = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Simulator::wait(uint32_t cycles) {
|
void Simulator::wait(uint32_t cycles) {
|
||||||
for (int i = 0; i < cycles; ++i) {
|
for (int i = 0; i < cycles; ++i) {
|
||||||
this->step();
|
this->step();
|
||||||
@@ -211,7 +224,9 @@ void Simulator::wait(uint32_t cycles) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool Simulator::is_busy() const {
|
bool Simulator::is_busy() const {
|
||||||
return vortex_->busy || snp_req_active_;
|
return vortex_->busy
|
||||||
|
|| snp_req_active_
|
||||||
|
|| csr_req_active_;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
|
void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
|
||||||
@@ -221,22 +236,52 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
|
|||||||
if (0 == size)
|
if (0 == size)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
snp_req_active_ = true;
|
|
||||||
snp_req_size_ = (size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE;
|
|
||||||
|
|
||||||
vortex_->snp_req_addr = mem_addr / GLOBAL_BLOCK_SIZE;
|
vortex_->snp_req_addr = mem_addr / GLOBAL_BLOCK_SIZE;
|
||||||
vortex_->snp_req_tag = 0;
|
vortex_->snp_req_tag = 0;
|
||||||
vortex_->snp_req_valid = 1;
|
vortex_->snp_req_valid = 1;
|
||||||
vortex_->snp_rsp_ready = 1;
|
vortex_->snp_rsp_ready = 1;
|
||||||
|
|
||||||
|
snp_req_size_ = (size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE;
|
||||||
--snp_req_size_;
|
--snp_req_size_;
|
||||||
pending_snp_reqs_ = 1;
|
pending_snp_reqs_ = 1;
|
||||||
|
|
||||||
|
snp_req_active_ = true;
|
||||||
|
|
||||||
#ifdef DBG_PRINT_CACHE_SNP
|
#ifdef DBG_PRINT_CACHE_SNP
|
||||||
std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << std::dec << " tag=" << vortex_->snp_req_tag << " remain=" << snp_req_size_ << std::endl;
|
std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << std::dec << " tag=" << vortex_->snp_req_tag << " remain=" << snp_req_size_ << std::endl;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Simulator::set_csr(int core_id, int addr, unsigned value) {
|
||||||
|
#ifndef NDEBUG
|
||||||
|
std::cout << timestamp << ": [sim] set_csr()" << std::endl;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
vortex_->csr_io_req_valid = 1;
|
||||||
|
vortex_->csr_io_req_coreid = core_id;
|
||||||
|
vortex_->csr_io_req_addr = addr;
|
||||||
|
vortex_->csr_io_req_rw = 1;
|
||||||
|
vortex_->csr_io_req_data = value;
|
||||||
|
vortex_->csr_io_rsp_ready = 0;
|
||||||
|
|
||||||
|
csr_req_active_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Simulator::get_csr(int core_id, int addr, unsigned *value) {
|
||||||
|
#ifndef NDEBUG
|
||||||
|
std::cout << timestamp << ": [sim] get_csr()" << std::endl;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
vortex_->csr_io_req_valid = 1;
|
||||||
|
vortex_->csr_io_req_coreid = core_id;
|
||||||
|
vortex_->csr_io_req_addr = addr;
|
||||||
|
vortex_->csr_io_req_rw = 0;
|
||||||
|
vortex_->csr_io_rsp_ready = 1;
|
||||||
|
|
||||||
|
csr_rsp_value_ = value;
|
||||||
|
csr_req_active_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
void Simulator::run() {
|
void Simulator::run() {
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
std::cout << timestamp << ": [sim] run()" << std::endl;
|
std::cout << timestamp << ": [sim] run()" << std::endl;
|
||||||
|
|||||||
@@ -31,6 +31,8 @@ public:
|
|||||||
Simulator();
|
Simulator();
|
||||||
virtual ~Simulator();
|
virtual ~Simulator();
|
||||||
|
|
||||||
|
void attach_ram(RAM* ram);
|
||||||
|
|
||||||
void load_bin(const char* program_file);
|
void load_bin(const char* program_file);
|
||||||
void load_ihex(const char* program_file);
|
void load_ihex(const char* program_file);
|
||||||
|
|
||||||
@@ -39,12 +41,14 @@ public:
|
|||||||
void reset();
|
void reset();
|
||||||
void step();
|
void step();
|
||||||
void wait(uint32_t cycles);
|
void wait(uint32_t cycles);
|
||||||
void flush_caches(uint32_t mem_addr, uint32_t size);
|
|
||||||
|
|
||||||
void attach_ram(RAM* ram);
|
void flush_caches(uint32_t mem_addr, uint32_t size);
|
||||||
|
void set_csr(int core_id, int addr, unsigned value);
|
||||||
|
void get_csr(int core_id, int addr, unsigned *value);
|
||||||
|
|
||||||
void run();
|
void run();
|
||||||
int get_last_wb_value(int reg) const;
|
int get_last_wb_value(int reg) const;
|
||||||
|
|
||||||
void print_stats(std::ostream& out);
|
void print_stats(std::ostream& out);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@@ -60,8 +64,11 @@ private:
|
|||||||
int dram_rsp_active_;
|
int dram_rsp_active_;
|
||||||
|
|
||||||
bool snp_req_active_;
|
bool snp_req_active_;
|
||||||
|
bool csr_req_active_;
|
||||||
|
|
||||||
uint32_t snp_req_size_;
|
uint32_t snp_req_size_;
|
||||||
uint32_t pending_snp_reqs_;
|
uint32_t pending_snp_reqs_;
|
||||||
|
uint32_t* csr_rsp_value_;
|
||||||
|
|
||||||
RAM *ram_;
|
RAM *ram_;
|
||||||
VVortex *vortex_;
|
VVortex *vortex_;
|
||||||
|
|||||||
Reference in New Issue
Block a user