minor updates
This commit is contained in:
@@ -246,7 +246,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
||||
// bank_stalls
|
||||
uint64_t dcache_bank_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_BANK_ST, CSR_MPM_DCACHE_BANK_ST_H, &dcache_bank_st_per_core);
|
||||
int dcache_bank_utilization = (int)((1.0 - (double(dcache_reads_per_core + dcache_writes_per_core) / double(dcache_reads_per_core + dcache_writes_per_core + dcache_bank_st_per_core))) * 100);
|
||||
int dcache_bank_utilization = (int)((double(dcache_reads_per_core + dcache_writes_per_core) / double(dcache_reads_per_core + dcache_writes_per_core + dcache_bank_st_per_core)) * 100);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache bank stalls=%ld (utilization=%d%%)\n", core_id, dcache_bank_st_per_core, dcache_bank_utilization);
|
||||
dcache_bank_stalls += dcache_bank_st_per_core;
|
||||
// mshr_stalls
|
||||
@@ -279,7 +279,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
||||
// bank_stalls
|
||||
uint64_t smem_bank_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_SMEM_BANK_ST, CSR_MPM_SMEM_BANK_ST_H, &smem_bank_st_per_core);
|
||||
int smem_bank_utilization = (int)((1.0 - (double(smem_reads_per_core + smem_writes_per_core) / double(smem_reads_per_core + smem_writes_per_core + smem_bank_st_per_core))) * 100);
|
||||
int smem_bank_utilization = (int)((double(smem_reads_per_core + smem_writes_per_core) / double(smem_reads_per_core + smem_writes_per_core + smem_bank_st_per_core)) * 100);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: smem bank stalls=%ld (utilization=%d%%)\n", core_id, smem_bank_st_per_core, smem_bank_utilization);
|
||||
smem_bank_stalls += smem_bank_st_per_core;
|
||||
|
||||
@@ -288,12 +288,12 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DRAM_READS, CSR_MPM_DRAM_READS_H, &dram_reads_per_core);
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DRAM_WRITES, CSR_MPM_DRAM_WRITES_H, &dram_writes_per_core);
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DRAM_ST, CSR_MPM_DRAM_ST_H, &dram_stalls_per_core);
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DRAM_LAT, CSR_MPM_DRAM_LAT_H, &dram_lat_per_core);
|
||||
int avg_dram_lat = (int)(double(dram_lat_per_core) / double(dram_reads_per_core));
|
||||
int dram_utilization = (int)((1.0 - (double(dram_reads_per_core + dram_writes_per_core) / double(dram_reads_per_core + dram_writes_per_core + dram_stalls_per_core))) * 100);
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DRAM_LAT, CSR_MPM_DRAM_LAT_H, &dram_lat_per_core);
|
||||
int dram_utilization = (int)((double(dram_reads_per_core + dram_writes_per_core) / double(dram_reads_per_core + dram_writes_per_core + dram_stalls_per_core)) * 100);
|
||||
int dram_avg_lat = (int)(double(dram_lat_per_core) / double(dram_reads_per_core));
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dram requests=%ld (reads=%ld, writes=%ld)\n", core_id, (dram_reads_per_core + dram_writes_per_core), dram_reads_per_core, dram_writes_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dram stalls=%ld (utilization=%d%%)\n", core_id, dram_stalls_per_core, dram_utilization);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: average dram latency=%d cycles\n", core_id, avg_dram_lat);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dram average latency=%d cycles\n", core_id, dram_avg_lat);
|
||||
dram_reads += dram_reads_per_core;
|
||||
dram_writes += dram_writes_per_core;
|
||||
dram_stalls += dram_stalls_per_core;
|
||||
@@ -308,10 +308,10 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
||||
int icache_read_hit_ratio = (int)((1.0 - (double(icache_read_misses) / double(icache_reads))) * 100);
|
||||
int dcache_read_hit_ratio = (int)((1.0 - (double(dcache_read_misses) / double(dcache_reads))) * 100);
|
||||
int dcache_write_hit_ratio = (int)((1.0 - (double(dcache_write_misses) / double(dcache_writes))) * 100);
|
||||
int dcache_bank_utilization = (int)((1.0 - (double(dcache_reads + dcache_writes) / double(dcache_reads + dcache_writes + dcache_bank_stalls))) * 100);
|
||||
int smem_bank_utilization = (int)((1.0 - (double(smem_reads + smem_writes) / double(smem_reads + smem_writes + smem_bank_stalls))) * 100);
|
||||
int dram_utilization = (int)((1.0 - (double(dram_reads + dram_writes) / double(dram_reads + dram_writes + dram_stalls))) * 100);
|
||||
int avg_dram_lat = (int)(double(dram_lat) / double(dram_reads));
|
||||
int dcache_bank_utilization = (int)((double(dcache_reads + dcache_writes) / double(dcache_reads + dcache_writes + dcache_bank_stalls)) * 100);
|
||||
int smem_bank_utilization = (int)((double(smem_reads + smem_writes) / double(smem_reads + smem_writes + smem_bank_stalls)) * 100);
|
||||
int dram_utilization = (int)((double(dram_reads + dram_writes) / double(dram_reads + dram_writes + dram_stalls)) * 100);
|
||||
int dram_avg_lat = (int)(double(dram_lat) / double(dram_reads));
|
||||
fprintf(stream, "PERF: ibuffer stalls=%ld\n", ibuffer_stalls);
|
||||
fprintf(stream, "PERF: scoreboard stalls=%ld\n", scoreboard_stalls);
|
||||
fprintf(stream, "PERF: alu unit stalls=%ld\n", alu_stalls);
|
||||
@@ -337,7 +337,8 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
||||
fprintf(stream, "PERF: smem bank stalls=%ld (utilization=%d%%)\n", smem_bank_stalls, smem_bank_utilization);
|
||||
fprintf(stream, "PERF: dram requests=%ld (reads=%ld, writes=%ld)\n", (dram_reads + dram_writes), dram_reads, dram_writes);
|
||||
fprintf(stream, "PERF: dram stalls=%ld (utilization=%d%%)\n", dram_stalls, dram_utilization);
|
||||
fprintf(stream, "PERF: average dram latency=%d cycles\n", avg_dram_lat);
|
||||
fprintf(stream, "PERF: dram average latency=%d cycles\n", dram_avg_lat);
|
||||
fprintf(stream, "PERF: dram bandwith=%d cycles\n", dram_avg_lat);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
|
||||
Reference in New Issue
Block a user