This commit is contained in:
Blaise Tine
2020-09-04 07:52:10 -07:00
17 changed files with 229 additions and 110 deletions

View File

@@ -91,22 +91,22 @@ extern int vx_upload_kernel_file(vx_device_h device, const char* filename) {
return err;
}
extern int vx_get_perf(vx_device_h device, size_t* cycles, size_t* instrs) {
extern int vx_get_perf(vx_device_h device, int core_id, size_t* cycles, size_t* instrs) {
int ret = 0;
unsigned value;
if (cycles) {
ret |= vx_csr_get(device, 0, CSR_CYCLE_H, &value);
ret |= vx_csr_get(device, core_id, CSR_CYCLE_H, &value);
*cycles = value;
ret |= vx_csr_get(device, 0, CSR_CYCLE, &value);
ret |= vx_csr_get(device, core_id, CSR_CYCLE, &value);
*cycles = (*cycles << 32) | value;
}
if (instrs) {
ret |= vx_csr_get(device, 0, CSR_INSTRET_H, &value);
ret |= vx_csr_get(device, core_id, CSR_INSTRET_H, &value);
*instrs = value;
ret |= vx_csr_get(device, 0, CSR_INSTRET, &value);
ret |= vx_csr_get(device, core_id, CSR_INSTRET, &value);
*instrs = (*instrs << 32) | value;
}

View File

@@ -58,10 +58,10 @@ int vx_start(vx_device_h hdevice);
int vx_ready_wait(vx_device_h hdevice, long long timeout);
// set device constant registers
int vx_csr_set(vx_device_h hdevice, int core, int address, unsigned value);
int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value);
// get device constant registers
int vx_csr_get(vx_device_h hdevice, int core, int address, unsigned* value);
int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* value);
////////////////////////////// UTILITY FUNCIONS ///////////////////////////////
@@ -72,7 +72,7 @@ int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size)
int vx_upload_kernel_file(vx_device_h device, const char* filename);
// get performance counters
int vx_get_perf(vx_device_h device, size_t* cycles, size_t* instrs);
int vx_get_perf(vx_device_h device, int core_id, size_t* cycles, size_t* instrs);
#ifdef __cplusplus
}

View File

@@ -17,6 +17,9 @@ CXXFLAGS +=-fstack-protector
# Position independent code
CXXFLAGS += -fPIC
# Dump perf stats
CXXFLAGS += -DDUMP_PERF_STATS
# Enable scope analyzer
#CXXFLAGS += -DSCOPE

View File

@@ -211,14 +211,29 @@ extern int vx_dev_close(vx_device_h hdevice) {
vx_scope_stop(device->fpga, 0);
#endif
{
// Dump perf stats
#ifdef DUMP_PERF_STATS
// Dump perf stats
if (device->num_cores > 1) {
uint64_t total_instrs = 0, total_cycles = 0;
for (unsigned core_id = 0; core_id < device->num_cores; ++core_id) {
uint64_t instrs, cycles;
int ret = vx_get_perf(hdevice, core_id, &instrs, &cycles);
assert(ret == 0);
float IPC = (float)(double(instrs) / double(cycles));
fprintf(stdout, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs, cycles, IPC);
total_instrs += instrs;
total_cycles = std::max<uint64_t>(total_cycles, cycles);
}
float IPC = (float)(double(total_instrs) / double(total_cycles));
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", total_instrs, total_cycles, IPC);
} else {
uint64_t instrs, cycles;
int ret = vx_get_perf(hdevice, &instrs, &cycles);
int ret = vx_get_perf(hdevice, 0, &instrs, &cycles);
float IPC = (float)(double(instrs) / double(cycles));
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
assert(ret == 0);
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
}
#endif
fpgaClose(device->fpga);
@@ -480,7 +495,7 @@ extern int vx_start(vx_device_h hdevice) {
}
// set device constant registers
extern int vx_csr_set(vx_device_h hdevice, int core, int address, unsigned value) {
extern int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value) {
if (nullptr == hdevice)
return -1;
@@ -491,8 +506,8 @@ extern int vx_csr_set(vx_device_h hdevice, int core, int address, unsigned value
return -1;
// write CSR value
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, address));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core_id));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, addr));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA, value));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CSR_WRITE));
@@ -500,7 +515,7 @@ extern int vx_csr_set(vx_device_h hdevice, int core, int address, unsigned value
}
// get device constant registers
extern int vx_csr_get(vx_device_h hdevice, int core, int address, unsigned* value) {
extern int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* value) {
if (nullptr == hdevice || nullptr == value)
return -1;
@@ -512,8 +527,8 @@ extern int vx_csr_get(vx_device_h hdevice, int core, int address, unsigned* valu
// write CSR value
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, address));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core_id));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, addr));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CSR_READ));
// Ensure ready for new command

View File

@@ -28,6 +28,8 @@ CFLAGS += -fPIC
CFLAGS += -DUSE_RTLSIM $(CONFIGS)
CFLAGS += -DDUMP_PERF_STATS
LDFLAGS += -shared -pthread
# LDFLAGS += -dynamiclib -pthread

View File

@@ -68,8 +68,7 @@ public:
simulator_.attach_ram(&ram_);
}
~vx_device() {
simulator_.print_stats(std::cout);
~vx_device() {
if (future_.valid()) {
future_.wait();
}
@@ -152,6 +151,28 @@ public:
return 0;
}
int set_csr(int core_id, int addr, unsigned value) {
if (future_.valid()) {
future_.wait(); // ensure prior run completed
}
simulator_.set_csr(core_id, addr, value);
while (simulator_.is_busy()) {
simulator_.step();
};
return 0;
}
int get_csr(int core_id, int addr, unsigned *value) {
if (future_.valid()) {
future_.wait(); // ensure prior run completed
}
simulator_.get_csr(core_id, addr, value);
while (simulator_.is_busy()) {
simulator_.step();
};
return 0;
}
private:
size_t mem_allocation_;
@@ -214,6 +235,29 @@ extern int vx_dev_close(vx_device_h hdevice) {
return -1;
vx_device *device = ((vx_device*)hdevice);
#ifdef DUMP_PERF_STATS
unsigned num_cores;
vx_csr_get(hdevice, 0, CSR_NC, &num_cores);
if (num_cores > 1) {
uint64_t total_instrs = 0, total_cycles = 0;
for (unsigned core_id = 0; core_id < num_cores; ++core_id) {
uint64_t instrs, cycles;
vx_get_perf(hdevice, core_id, &instrs, &cycles);
float IPC = (float)(double(instrs) / double(cycles));
fprintf(stdout, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs, cycles, IPC);
total_instrs += instrs;
total_cycles = std::max<uint64_t>(total_cycles, cycles);
}
float IPC = (float)(double(total_instrs) / double(total_cycles));
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", total_instrs, total_cycles, IPC);
} else {
uint64_t instrs, cycles;
vx_get_perf(hdevice, 0, &instrs, &cycles);
float IPC = (float)(double(instrs) / double(cycles));
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
}
#endif
delete device;
@@ -324,10 +368,20 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
return device->wait(timeout);
}
extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned /*value*/) {
return -1;
extern int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->set_csr(core_id, addr, value);
}
extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned* /*value*/) {
return -1;
extern int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* value) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->get_csr(core_id, addr, value);
}

View File

@@ -358,10 +358,10 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
return device->wait(timeout);
}
extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned /*value*/) {
extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned /*value*/) {
return -1;
}
extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned* /*value*/) {
extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned* /*value*/) {
return -1;
}

View File

@@ -48,10 +48,10 @@ extern int vx_ready_wait(vx_device_h /*hdevice*/, long long /*timeout*/) {
return -1;
}
extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned /*value*/) {
extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned /*value*/) {
return -1;
}
extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned* /*value*/) {
extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned* /*value*/) {
return -1;
}