adding CSR support to rtlsim driver
This commit is contained in:
@@ -91,22 +91,22 @@ extern int vx_upload_kernel_file(vx_device_h device, const char* filename) {
|
||||
return err;
|
||||
}
|
||||
|
||||
extern int vx_get_perf(vx_device_h device, size_t* cycles, size_t* instrs) {
|
||||
extern int vx_get_perf(vx_device_h device, int core_id, size_t* cycles, size_t* instrs) {
|
||||
int ret = 0;
|
||||
|
||||
unsigned value;
|
||||
|
||||
if (cycles) {
|
||||
ret |= vx_csr_get(device, 0, CSR_CYCLE_H, &value);
|
||||
ret |= vx_csr_get(device, core_id, CSR_CYCLE_H, &value);
|
||||
*cycles = value;
|
||||
ret |= vx_csr_get(device, 0, CSR_CYCLE, &value);
|
||||
ret |= vx_csr_get(device, core_id, CSR_CYCLE, &value);
|
||||
*cycles = (*cycles << 32) | value;
|
||||
}
|
||||
|
||||
if (instrs) {
|
||||
ret |= vx_csr_get(device, 0, CSR_INSTRET_H, &value);
|
||||
ret |= vx_csr_get(device, core_id, CSR_INSTRET_H, &value);
|
||||
*instrs = value;
|
||||
ret |= vx_csr_get(device, 0, CSR_INSTRET, &value);
|
||||
ret |= vx_csr_get(device, core_id, CSR_INSTRET, &value);
|
||||
*instrs = (*instrs << 32) | value;
|
||||
}
|
||||
|
||||
|
||||
@@ -58,10 +58,10 @@ int vx_start(vx_device_h hdevice);
|
||||
int vx_ready_wait(vx_device_h hdevice, long long timeout);
|
||||
|
||||
// set device constant registers
|
||||
int vx_csr_set(vx_device_h hdevice, int core, int address, unsigned value);
|
||||
int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value);
|
||||
|
||||
// get device constant registers
|
||||
int vx_csr_get(vx_device_h hdevice, int core, int address, unsigned* value);
|
||||
int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* value);
|
||||
|
||||
////////////////////////////// UTILITY FUNCIONS ///////////////////////////////
|
||||
|
||||
@@ -72,7 +72,7 @@ int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size)
|
||||
int vx_upload_kernel_file(vx_device_h device, const char* filename);
|
||||
|
||||
// get performance counters
|
||||
int vx_get_perf(vx_device_h device, size_t* cycles, size_t* instrs);
|
||||
int vx_get_perf(vx_device_h device, int core_id, size_t* cycles, size_t* instrs);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -17,6 +17,9 @@ CXXFLAGS +=-fstack-protector
|
||||
# Position independent code
|
||||
CXXFLAGS += -fPIC
|
||||
|
||||
# Dump perf stats
|
||||
CXXFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
# Enable scope analyzer
|
||||
#CXXFLAGS += -DSCOPE
|
||||
|
||||
|
||||
@@ -211,14 +211,29 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
||||
vx_scope_stop(device->fpga, 0);
|
||||
#endif
|
||||
|
||||
{
|
||||
// Dump perf stats
|
||||
#ifdef DUMP_PERF_STATS
|
||||
// Dump perf stats
|
||||
if (device->num_cores > 1) {
|
||||
uint64_t total_instrs = 0, total_cycles = 0;
|
||||
for (unsigned core_id = 0; core_id < device->num_cores; ++core_id) {
|
||||
uint64_t instrs, cycles;
|
||||
int ret = vx_get_perf(hdevice, core_id, &instrs, &cycles);
|
||||
assert(ret == 0);
|
||||
float IPC = (float)(double(instrs) / double(cycles));
|
||||
fprintf(stdout, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs, cycles, IPC);
|
||||
total_instrs += instrs;
|
||||
total_cycles = std::max<uint64_t>(total_cycles, cycles);
|
||||
}
|
||||
float IPC = (float)(double(total_instrs) / double(total_cycles));
|
||||
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", total_instrs, total_cycles, IPC);
|
||||
} else {
|
||||
uint64_t instrs, cycles;
|
||||
int ret = vx_get_perf(hdevice, &instrs, &cycles);
|
||||
int ret = vx_get_perf(hdevice, 0, &instrs, &cycles);
|
||||
float IPC = (float)(double(instrs) / double(cycles));
|
||||
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
|
||||
assert(ret == 0);
|
||||
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
|
||||
}
|
||||
#endif
|
||||
|
||||
fpgaClose(device->fpga);
|
||||
|
||||
@@ -480,7 +495,7 @@ extern int vx_start(vx_device_h hdevice) {
|
||||
}
|
||||
|
||||
// set device constant registers
|
||||
extern int vx_csr_set(vx_device_h hdevice, int core, int address, unsigned value) {
|
||||
extern int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
@@ -491,8 +506,8 @@ extern int vx_csr_set(vx_device_h hdevice, int core, int address, unsigned value
|
||||
return -1;
|
||||
|
||||
// write CSR value
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, address));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core_id));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, addr));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA, value));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CSR_WRITE));
|
||||
|
||||
@@ -500,7 +515,7 @@ extern int vx_csr_set(vx_device_h hdevice, int core, int address, unsigned value
|
||||
}
|
||||
|
||||
// get device constant registers
|
||||
extern int vx_csr_get(vx_device_h hdevice, int core, int address, unsigned* value) {
|
||||
extern int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* value) {
|
||||
if (nullptr == hdevice || nullptr == value)
|
||||
return -1;
|
||||
|
||||
@@ -512,8 +527,8 @@ extern int vx_csr_get(vx_device_h hdevice, int core, int address, unsigned* valu
|
||||
|
||||
|
||||
// write CSR value
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, address));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core_id));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, addr));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CSR_READ));
|
||||
|
||||
// Ensure ready for new command
|
||||
|
||||
@@ -28,6 +28,8 @@ CFLAGS += -fPIC
|
||||
|
||||
CFLAGS += -DUSE_RTLSIM $(CONFIGS)
|
||||
|
||||
CFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
# LDFLAGS += -dynamiclib -pthread
|
||||
|
||||
|
||||
@@ -69,7 +69,28 @@ public:
|
||||
}
|
||||
|
||||
~vx_device() {
|
||||
simulator_.print_stats(std::cout);
|
||||
#ifdef DUMP_PERF_STATS
|
||||
unsigned num_cores;
|
||||
this->get_csr(0, CSR_NC, &num_cores);
|
||||
if (num_cores > 1) {
|
||||
uint64_t total_instrs = 0, total_cycles = 0;
|
||||
for (unsigned core_id = 0; core_id < num_cores; ++core_id) {
|
||||
uint64_t instrs, cycles;
|
||||
vx_get_perf(this, core_id, &instrs, &cycles);
|
||||
float IPC = (float)(double(instrs) / double(cycles));
|
||||
fprintf(stdout, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs, cycles, IPC);
|
||||
total_instrs += instrs;
|
||||
total_cycles = std::max<uint64_t>(total_cycles, cycles);
|
||||
}
|
||||
float IPC = (float)(double(total_instrs) / double(total_cycles));
|
||||
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", total_instrs, total_cycles, IPC);
|
||||
} else {
|
||||
uint64_t instrs, cycles;
|
||||
vx_get_perf(this, 0, &instrs, &cycles);
|
||||
float IPC = (float)(double(instrs) / double(cycles));
|
||||
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
|
||||
}
|
||||
#endif
|
||||
if (future_.valid()) {
|
||||
future_.wait();
|
||||
}
|
||||
@@ -152,6 +173,28 @@ public:
|
||||
return 0;
|
||||
}
|
||||
|
||||
int set_csr(int core_id, int addr, unsigned value) {
|
||||
if (future_.valid()) {
|
||||
future_.wait(); // ensure prior run completed
|
||||
}
|
||||
simulator_.set_csr(core_id, addr, value);
|
||||
while (simulator_.is_busy()) {
|
||||
simulator_.step();
|
||||
};
|
||||
return 0;
|
||||
}
|
||||
|
||||
int get_csr(int core_id, int addr, unsigned *value) {
|
||||
if (future_.valid()) {
|
||||
future_.wait(); // ensure prior run completed
|
||||
}
|
||||
simulator_.get_csr(core_id, addr, value);
|
||||
while (simulator_.is_busy()) {
|
||||
simulator_.step();
|
||||
};
|
||||
return 0;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
size_t mem_allocation_;
|
||||
@@ -324,10 +367,20 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
||||
return device->wait(timeout);
|
||||
}
|
||||
|
||||
extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned /*value*/) {
|
||||
return -1;
|
||||
extern int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->set_csr(core_id, addr, value);
|
||||
}
|
||||
|
||||
extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned* /*value*/) {
|
||||
return -1;
|
||||
extern int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->get_csr(core_id, addr, value);
|
||||
}
|
||||
@@ -358,10 +358,10 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
||||
return device->wait(timeout);
|
||||
}
|
||||
|
||||
extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned /*value*/) {
|
||||
extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned /*value*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned* /*value*/) {
|
||||
extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned* /*value*/) {
|
||||
return -1;
|
||||
}
|
||||
@@ -48,10 +48,10 @@ extern int vx_ready_wait(vx_device_h /*hdevice*/, long long /*timeout*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned /*value*/) {
|
||||
extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned /*value*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned* /*value*/) {
|
||||
extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned* /*value*/) {
|
||||
return -1;
|
||||
}
|
||||
Reference in New Issue
Block a user