code refactoring: DRAM => MEM renaming

This commit is contained in:
Blaise Tine
2021-04-26 00:58:48 -07:00
parent d808aa2735
commit 8410c49f53
38 changed files with 1161 additions and 1161 deletions

View File

@@ -115,10 +115,10 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
uint64_t smem_writes = 0;
uint64_t smem_bank_stalls = 0;
// PERF: memory
uint64_t dram_reads = 0;
uint64_t dram_writes = 0;
uint64_t dram_stalls = 0;
uint64_t dram_lat = 0;
uint64_t mem_reads = 0;
uint64_t mem_writes = 0;
uint64_t mem_stalls = 0;
uint64_t mem_lat = 0;
#endif
for (unsigned core_id = 0; core_id < num_cores; ++core_id) {
@@ -255,21 +255,21 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
if (num_cores > 1) fprintf(stream, "PERF: core%d: smem bank stalls=%ld (utilization=%d%%)\n", core_id, smem_bank_st_per_core, smem_bank_utilization);
smem_bank_stalls += smem_bank_st_per_core;
// PERF: DRAM
uint64_t dram_reads_per_core, dram_writes_per_core, dram_stalls_per_core, dram_lat_per_core;
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DRAM_READS, CSR_MPM_DRAM_READS_H, &dram_reads_per_core);
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DRAM_WRITES, CSR_MPM_DRAM_WRITES_H, &dram_writes_per_core);
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DRAM_ST, CSR_MPM_DRAM_ST_H, &dram_stalls_per_core);
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DRAM_LAT, CSR_MPM_DRAM_LAT_H, &dram_lat_per_core);
int dram_utilization = (int)((double(dram_reads_per_core + dram_writes_per_core) / double(dram_reads_per_core + dram_writes_per_core + dram_stalls_per_core)) * 100);
int dram_avg_lat = (int)(double(dram_lat_per_core) / double(dram_reads_per_core));
if (num_cores > 1) fprintf(stream, "PERF: core%d: dram requests=%ld (reads=%ld, writes=%ld)\n", core_id, (dram_reads_per_core + dram_writes_per_core), dram_reads_per_core, dram_writes_per_core);
if (num_cores > 1) fprintf(stream, "PERF: core%d: dram stalls=%ld (utilization=%d%%)\n", core_id, dram_stalls_per_core, dram_utilization);
if (num_cores > 1) fprintf(stream, "PERF: core%d: dram average latency=%d cycles\n", core_id, dram_avg_lat);
dram_reads += dram_reads_per_core;
dram_writes += dram_writes_per_core;
dram_stalls += dram_stalls_per_core;
dram_lat += dram_lat_per_core;
// PERF: memory
uint64_t mem_reads_per_core, mem_writes_per_core, mem_stalls_per_core, mem_lat_per_core;
ret |= vx_csr_get_l(device, core_id, CSR_MPM_MEM_READS, CSR_MPM_MEM_READS_H, &mem_reads_per_core);
ret |= vx_csr_get_l(device, core_id, CSR_MPM_MEM_WRITES, CSR_MPM_MEM_WRITES_H, &mem_writes_per_core);
ret |= vx_csr_get_l(device, core_id, CSR_MPM_MEM_ST, CSR_MPM_MEM_ST_H, &mem_stalls_per_core);
ret |= vx_csr_get_l(device, core_id, CSR_MPM_MEM_LAT, CSR_MPM_MEM_LAT_H, &mem_lat_per_core);
int mem_utilization = (int)((double(mem_reads_per_core + mem_writes_per_core) / double(mem_reads_per_core + mem_writes_per_core + mem_stalls_per_core)) * 100);
int mem_avg_lat = (int)(double(mem_lat_per_core) / double(mem_reads_per_core));
if (num_cores > 1) fprintf(stream, "PERF: core%d: memory requests=%ld (reads=%ld, writes=%ld)\n", core_id, (mem_reads_per_core + mem_writes_per_core), mem_reads_per_core, mem_writes_per_core);
if (num_cores > 1) fprintf(stream, "PERF: core%d: memory stalls=%ld (utilization=%d%%)\n", core_id, mem_stalls_per_core, mem_utilization);
if (num_cores > 1) fprintf(stream, "PERF: core%d: memory average latency=%d cycles\n", core_id, mem_avg_lat);
mem_reads += mem_reads_per_core;
mem_writes += mem_writes_per_core;
mem_stalls += mem_stalls_per_core;
mem_lat += mem_lat_per_core;
#endif
}
@@ -282,8 +282,8 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
int dcache_write_hit_ratio = (int)((1.0 - (double(dcache_write_misses) / double(dcache_writes))) * 100);
int dcache_bank_utilization = (int)((double(dcache_reads + dcache_writes) / double(dcache_reads + dcache_writes + dcache_bank_stalls)) * 100);
int smem_bank_utilization = (int)((double(smem_reads + smem_writes) / double(smem_reads + smem_writes + smem_bank_stalls)) * 100);
int dram_utilization = (int)((double(dram_reads + dram_writes) / double(dram_reads + dram_writes + dram_stalls)) * 100);
int dram_avg_lat = (int)(double(dram_lat) / double(dram_reads));
int mem_utilization = (int)((double(mem_reads + mem_writes) / double(mem_reads + mem_writes + mem_stalls)) * 100);
int mem_avg_lat = (int)(double(mem_lat) / double(mem_reads));
fprintf(stream, "PERF: ibuffer stalls=%ld\n", ibuffer_stalls);
fprintf(stream, "PERF: scoreboard stalls=%ld\n", scoreboard_stalls);
fprintf(stream, "PERF: alu unit stalls=%ld\n", alu_stalls);
@@ -306,9 +306,9 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
fprintf(stream, "PERF: smem reads=%ld\n", smem_reads);
fprintf(stream, "PERF: smem writes=%ld\n", smem_writes);
fprintf(stream, "PERF: smem bank stalls=%ld (utilization=%d%%)\n", smem_bank_stalls, smem_bank_utilization);
fprintf(stream, "PERF: dram requests=%ld (reads=%ld, writes=%ld)\n", (dram_reads + dram_writes), dram_reads, dram_writes);
fprintf(stream, "PERF: dram stalls=%ld (utilization=%d%%)\n", dram_stalls, dram_utilization);
fprintf(stream, "PERF: dram average latency=%d cycles\n", dram_avg_lat);
fprintf(stream, "PERF: memory requests=%ld (reads=%ld, writes=%ld)\n", (mem_reads + mem_writes), mem_reads, mem_writes);
fprintf(stream, "PERF: memory stalls=%ld (utilization=%d%%)\n", mem_stalls, mem_utilization);
fprintf(stream, "PERF: memory average latency=%d cycles\n", mem_avg_lat);
#endif
return ret;

View File

@@ -13,7 +13,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSHR
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
DBG_PRINT_FLAGS += -DDBG_PRINT_MEM
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE

View File

@@ -10,10 +10,10 @@
#define RESET_DELAY 4
#define ENABLE_DRAM_STALLS
#define DRAM_LATENCY 24
#define DRAM_RQ_SIZE 16
#define DRAM_STALLS_MODULO 16
#define ENABLE_MEM_STALLS
#define MEM_LATENCY 24
#define MEM_RQ_SIZE 16
#define MEM_STALLS_MODULO 16
uint64_t timestamp = 0;
@@ -138,7 +138,7 @@ void opae_sim::flush() {
void opae_sim::reset() {
host_buffers_.clear();
dram_reads_.clear();
mem_reads_.clear();
cci_reads_.clear();
cci_writes_.clear();
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0;
@@ -268,87 +268,87 @@ void opae_sim::sTxPort_bus() {
}
void opae_sim::avs_bus() {
// update DRAM responses schedule
for (auto& rsp : dram_reads_) {
// update memory responses schedule
for (auto& rsp : mem_reads_) {
if (rsp.cycles_left > 0)
rsp.cycles_left -= 1;
}
// schedule DRAM responses in FIFO order
std::list<dram_rd_req_t>::iterator dram_rd_it(dram_reads_.end());
if (!dram_reads_.empty()
&& (0 == dram_reads_.begin()->cycles_left)) {
dram_rd_it = dram_reads_.begin();
// schedule memory responses in FIFO order
std::list<mem_rd_req_t>::iterator mem_rd_it(mem_reads_.end());
if (!mem_reads_.empty()
&& (0 == mem_reads_.begin()->cycles_left)) {
mem_rd_it = mem_reads_.begin();
}
// send DRAM response
// send memory response
vortex_afu_->avs_readdatavalid = 0;
if (dram_rd_it != dram_reads_.end()) {
if (mem_rd_it != mem_reads_.end()) {
vortex_afu_->avs_readdatavalid = 1;
memcpy(vortex_afu_->avs_readdata, dram_rd_it->data.data(), DRAM_BLOCK_SIZE);
uint32_t addr = dram_rd_it->addr;
dram_reads_.erase(dram_rd_it);
/*printf("%0ld: [sim] DRAM Rd Rsp: addr=%x, pending={", timestamp, addr * DRAM_BLOCK_SIZE);
for (auto& req : dram_reads_) {
memcpy(vortex_afu_->avs_readdata, mem_rd_it->data.data(), MEM_BLOCK_SIZE);
uint32_t addr = mem_rd_it->addr;
mem_reads_.erase(mem_rd_it);
/*printf("%0ld: [sim] MEM Rd Rsp: addr=%x, pending={", timestamp, addr * MEM_BLOCK_SIZE);
for (auto& req : mem_reads_) {
if (req.cycles_left != 0)
printf(" !%0x", req.addr * DRAM_BLOCK_SIZE);
printf(" !%0x", req.addr * MEM_BLOCK_SIZE);
else
printf(" %0x", req.addr * DRAM_BLOCK_SIZE);
printf(" %0x", req.addr * MEM_BLOCK_SIZE);
}
printf("}\n");*/
}
// handle DRAM stalls
bool dram_stalled = false;
#ifdef ENABLE_DRAM_STALLS
if (0 == ((timestamp/2) % DRAM_STALLS_MODULO)) {
dram_stalled = true;
// handle memory stalls
bool mem_stalled = false;
#ifdef ENABLE_MEM_STALLS
if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) {
mem_stalled = true;
} else
if (dram_reads_.size() >= DRAM_RQ_SIZE) {
dram_stalled = true;
if (mem_reads_.size() >= MEM_RQ_SIZE) {
mem_stalled = true;
}
#endif
// process DRAM requests
if (!dram_stalled) {
// process memory requests
if (!mem_stalled) {
assert(!vortex_afu_->avs_read || !vortex_afu_->avs_write);
if (vortex_afu_->avs_write) {
uint64_t byteen = vortex_afu_->avs_byteenable;
unsigned base_addr = vortex_afu_->avs_address * DRAM_BLOCK_SIZE;
unsigned base_addr = vortex_afu_->avs_address * MEM_BLOCK_SIZE;
uint8_t* data = (uint8_t*)(vortex_afu_->avs_writedata);
for (int i = 0; i < DRAM_BLOCK_SIZE; i++) {
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
ram_[base_addr + i] = data[i];
}
}
/*printf("%0ld: [sim] DRAM Wr Req: addr=%x, data=", timestamp, base_addr);
for (int i = 0; i < DRAM_BLOCK_SIZE; i++) {
printf("%0x", data[(DRAM_BLOCK_SIZE-1)-i]);
/*printf("%0ld: [sim] MEM Wr Req: addr=%x, data=", timestamp, base_addr);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
printf("%0x", data[(MEM_BLOCK_SIZE-1)-i]);
}
printf("\n");*/
}
if (vortex_afu_->avs_read) {
dram_rd_req_t dram_req;
dram_req.addr = vortex_afu_->avs_address;
ram_.read(vortex_afu_->avs_address * DRAM_BLOCK_SIZE, DRAM_BLOCK_SIZE, dram_req.data.data());
dram_req.cycles_left = DRAM_LATENCY;
for (auto& rsp : dram_reads_) {
if (dram_req.addr == rsp.addr) {
dram_req.cycles_left = rsp.cycles_left;
mem_rd_req_t mem_req;
mem_req.addr = vortex_afu_->avs_address;
ram_.read(vortex_afu_->avs_address * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE, mem_req.data.data());
mem_req.cycles_left = MEM_LATENCY;
for (auto& rsp : mem_reads_) {
if (mem_req.addr == rsp.addr) {
mem_req.cycles_left = rsp.cycles_left;
break;
}
}
dram_reads_.emplace_back(dram_req);
/*printf("%0ld: [sim] DRAM Rd Req: addr=%x, pending={", timestamp, dram_req.addr * DRAM_BLOCK_SIZE);
for (auto& req : dram_reads_) {
mem_reads_.emplace_back(mem_req);
/*printf("%0ld: [sim] MEM Rd Req: addr=%x, pending={", timestamp, mem_req.addr * MEM_BLOCK_SIZE);
for (auto& req : mem_reads_) {
if (req.cycles_left != 0)
printf(" !%0x", req.addr * DRAM_BLOCK_SIZE);
printf(" !%0x", req.addr * MEM_BLOCK_SIZE);
else
printf(" %0x", req.addr * DRAM_BLOCK_SIZE);
printf(" %0x", req.addr * MEM_BLOCK_SIZE);
}
printf("}\n");*/
}
}
vortex_afu_->avs_waitrequest = dram_stalled;
vortex_afu_->avs_waitrequest = mem_stalled;
}

View File

@@ -17,7 +17,7 @@
#include <list>
#include <unordered_map>
#define DRAM_BLOCK_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH / 8)
#define MEM_BLOCK_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH / 8)
#define CACHE_BLOCK_SIZE 64
@@ -43,9 +43,9 @@ private:
typedef struct {
int cycles_left;
std::array<uint8_t, DRAM_BLOCK_SIZE> data;
std::array<uint8_t, MEM_BLOCK_SIZE> data;
uint32_t addr;
} dram_rd_req_t;
} mem_rd_req_t;
typedef struct {
int cycles_left;
@@ -80,7 +80,7 @@ private:
std::unordered_map<int64_t, host_buffer_t> host_buffers_;
std::list<dram_rd_req_t> dram_reads_;
std::list<mem_rd_req_t> mem_reads_;
std::list<cci_rd_req_t> cci_reads_;

View File

@@ -12,7 +12,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSHR
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
DBG_PRINT_FLAGS += -DDBG_PRINT_MEM
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE