using ramulator dram simulator
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
DESTDIR ?= .
|
||||
RTL_DIR = ../../hw/rtl
|
||||
DPI_DIR = ../../hw/dpi
|
||||
SCRIPT_DIR = ../../hw/scripts
|
||||
@@ -7,8 +8,10 @@ CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I.. -I../../../hw -I../../common
|
||||
CXXFLAGS += -I../$(THIRD_PARTY_DIR)/softfloat/source/include
|
||||
CXXFLAGS += -I../$(THIRD_PARTY_DIR)
|
||||
|
||||
LDFLAGS += -shared ../$(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
|
||||
LDFLAGS += -L../$(THIRD_PARTY_DIR)/ramulator -lramulator
|
||||
|
||||
# control RTL debug tracing states
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_PIPELINE
|
||||
@@ -87,22 +90,15 @@ VL_FLAGS += -DIDIV_DPI
|
||||
FPU_CORE ?= FPU_DPI
|
||||
VL_FLAGS += -D$(FPU_CORE)
|
||||
|
||||
PROJECT = libopae-c-vlsim
|
||||
PROJECT = libopae-c-vlsim.so
|
||||
|
||||
all: $(PROJECT).so
|
||||
all: $(PROJECT)
|
||||
|
||||
vortex_afu.h : $(RTL_DIR)/afu/vortex_afu.vh
|
||||
$(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/afu/vortex_afu.vh -o vortex_afu.h
|
||||
|
||||
$(PROJECT).so: $(SRCS) vortex_afu.h
|
||||
verilator --build $(VL_FLAGS) $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT).so
|
||||
$(DESTDIR)/$(PROJECT): $(SRCS) vortex_afu.h
|
||||
verilator --build $(VL_FLAGS) $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(DESTDIR)/$(PROJECT)
|
||||
|
||||
static: $(SRCS) vortex_afu.h
|
||||
verilator --build $(VL_FLAGS) $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)'
|
||||
$(AR) rcs $(PROJECT).a obj_dir/*.o $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/*.o
|
||||
|
||||
clean-static:
|
||||
rm -rf $(PROJECT).a obj_dir vortex_afu.h
|
||||
|
||||
clean: clean-static
|
||||
rm -rf $(PROJECT).so
|
||||
clean:
|
||||
rm -rf obj_dir $(DESTDIR)/$(PROJECT)
|
||||
|
||||
@@ -13,6 +13,31 @@
|
||||
#include <iomanip>
|
||||
#include <mem.h>
|
||||
|
||||
#define RAMULATOR
|
||||
#include <ramulator/src/Gem5Wrapper.h>
|
||||
#include <ramulator/src/Request.h>
|
||||
#include <ramulator/src/Statistics.h>
|
||||
|
||||
#include <VX_config.h>
|
||||
#include <vortex_afu.h>
|
||||
|
||||
#include <future>
|
||||
#include <list>
|
||||
#include <unordered_map>
|
||||
|
||||
#ifndef MEMORY_BANKS
|
||||
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
||||
#define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
||||
#else
|
||||
#define MEMORY_BANKS 2
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#undef MEM_BLOCK_SIZE
|
||||
#define MEM_BLOCK_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH / 8)
|
||||
|
||||
#define CACHE_BLOCK_SIZE 64
|
||||
|
||||
#define CCI_LATENCY 8
|
||||
#define CCI_RAND_MOD 8
|
||||
#define CCI_RQ_SIZE 16
|
||||
@@ -28,18 +53,6 @@
|
||||
#define TRACE_STOP_TIME -1ull
|
||||
#endif
|
||||
|
||||
#ifndef MEM_LATENCY
|
||||
#define MEM_LATENCY 24
|
||||
#endif
|
||||
|
||||
#ifndef MEM_RQ_SIZE
|
||||
#define MEM_RQ_SIZE 16
|
||||
#endif
|
||||
|
||||
#ifndef MEM_STALLS_MODULO
|
||||
#define MEM_STALLS_MODULO 16
|
||||
#endif
|
||||
|
||||
#ifndef VERILATOR_RESET_VALUE
|
||||
#define VERILATOR_RESET_VALUE 2
|
||||
#endif
|
||||
@@ -88,357 +101,417 @@ void sim_trace_enable(bool enable) {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
namespace vortex {
|
||||
class VL_OBJ {
|
||||
class opae_sim::Impl {
|
||||
public:
|
||||
#ifdef AXI_BUS
|
||||
VVortex_axi *device;
|
||||
#else
|
||||
Vvortex_afu_shim *device;
|
||||
#endif
|
||||
#ifdef VCD_OUTPUT
|
||||
VerilatedVcdC *trace;
|
||||
#endif
|
||||
|
||||
VL_OBJ() {
|
||||
Impl()
|
||||
: stop_(false)
|
||||
, host_buffer_ids_(0) {
|
||||
// force random values for unitialized signals
|
||||
Verilated::randReset(VERILATOR_RESET_VALUE);
|
||||
Verilated::randSeed(50);
|
||||
|
||||
// Turn off assertion before reset
|
||||
// turn off assertion before reset
|
||||
Verilated::assertOn(false);
|
||||
|
||||
#ifdef AXI_BUS
|
||||
this->device = new Vvortex_afu_shim();
|
||||
#else
|
||||
this->device = new Vvortex_afu_shim();
|
||||
#endif
|
||||
// create RTL module instance
|
||||
device_ = new Vvortex_afu_shim();
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
Verilated::traceEverOn(true);
|
||||
this->trace = new VerilatedVcdC();
|
||||
this->device->trace(this->trace, 99);
|
||||
this->trace->open("trace.vcd");
|
||||
trace_ = new VerilatedVcdC();
|
||||
device_->trace(this->trace, 99);
|
||||
trace_->open("trace.vcd");
|
||||
#endif
|
||||
|
||||
ram_ = new RAM(RAM_PAGE_SIZE);
|
||||
|
||||
// initialize dram simulator
|
||||
ramulator::Config ram_config;
|
||||
ram_config.add("standard", "DDR4");
|
||||
ram_config.add("channels", std::to_string(MEMORY_BANKS));
|
||||
ram_config.add("ranks", "1");
|
||||
ram_config.add("speed", "DDR4_2400R");
|
||||
ram_config.add("org", "DDR4_4Gb_x8");
|
||||
ram_config.add("mapping", "defaultmapping");
|
||||
ram_config.set_core_num(1);
|
||||
dram_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
|
||||
Stats::statlist.output("ramulator.ddr4.log");
|
||||
|
||||
// reset the device
|
||||
this->reset();
|
||||
|
||||
// launch execution thread
|
||||
future_ = std::async(std::launch::async, [&]{
|
||||
while (!stop_) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
this->step();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
~VL_OBJ() {
|
||||
~Impl() {
|
||||
stop_ = true;
|
||||
if (future_.valid()) {
|
||||
future_.wait();
|
||||
}
|
||||
for (auto& buffer : host_buffers_) {
|
||||
__aligned_free(buffer.second.data);
|
||||
}
|
||||
#ifdef VCD_OUTPUT
|
||||
this->trace->close();
|
||||
delete this->trace;
|
||||
trace_->close();
|
||||
delete trace_;
|
||||
#endif
|
||||
delete this->device;
|
||||
}
|
||||
};
|
||||
}
|
||||
delete device_;
|
||||
|
||||
delete ram_;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
opae_sim::opae_sim()
|
||||
: stop_(false)
|
||||
, host_buffer_ids_(0) {
|
||||
vl_obj_ = new VL_OBJ();
|
||||
ram_ = new RAM(RAM_PAGE_SIZE);
|
||||
|
||||
// reset the device
|
||||
this->reset();
|
||||
|
||||
// launch execution thread
|
||||
future_ = std::async(std::launch::async, [&]{
|
||||
while (!stop_) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
this->step();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
opae_sim::~opae_sim() {
|
||||
stop_ = true;
|
||||
if (future_.valid()) {
|
||||
future_.wait();
|
||||
}
|
||||
for (auto& buffer : host_buffers_) {
|
||||
__aligned_free(buffer.second.data);
|
||||
}
|
||||
delete vl_obj_;
|
||||
delete ram_;
|
||||
}
|
||||
|
||||
int opae_sim::prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) {
|
||||
auto alloc = __aligned_malloc(CACHE_BLOCK_SIZE, len);
|
||||
if (alloc == NULL)
|
||||
return -1;
|
||||
host_buffer_t buffer;
|
||||
buffer.data = (uint64_t*)alloc;
|
||||
buffer.size = len;
|
||||
buffer.ioaddr = uintptr_t(alloc);
|
||||
auto buffer_id = host_buffer_ids_++;
|
||||
host_buffers_.emplace(buffer_id, buffer);
|
||||
*buf_addr = alloc;
|
||||
*wsid = buffer_id;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void opae_sim::release_buffer(uint64_t wsid) {
|
||||
auto it = host_buffers_.find(wsid);
|
||||
if (it != host_buffers_.end()) {
|
||||
__aligned_free(it->second.data);
|
||||
host_buffers_.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
void opae_sim::get_io_address(uint64_t wsid, uint64_t *ioaddr) {
|
||||
*ioaddr = host_buffers_[wsid].ioaddr;
|
||||
}
|
||||
|
||||
void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_mmioRdValid = 1;
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
|
||||
this->step();
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_mmioRdValid = 0;
|
||||
assert(vl_obj_->device->af2cp_sTxPort_c2_mmioRdValid);
|
||||
*value = vl_obj_->device->af2cp_sTxPort_c2_data;
|
||||
}
|
||||
|
||||
void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_mmioWrValid = 1;
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
|
||||
memcpy(vl_obj_->device->vcp2af_sRxPort_c0_data, &value, 8);
|
||||
this->step();
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_mmioWrValid = 0;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void opae_sim::reset() {
|
||||
cci_reads_.clear();
|
||||
cci_writes_.clear();
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_mmioRdValid = 0;
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_mmioWrValid = 0;
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_rspValid = 0;
|
||||
vl_obj_->device->vcp2af_sRxPort_c1_rspValid = 0;
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_TxAlmFull = 0;
|
||||
vl_obj_->device->vcp2af_sRxPort_c1_TxAlmFull = 0;
|
||||
|
||||
for (int b = 0; b < MEMORY_BANKS; ++b) {
|
||||
mem_reads_[b].clear();
|
||||
vl_obj_->device->avs_readdatavalid[b] = 0;
|
||||
vl_obj_->device->avs_waitrequest[b] = 0;
|
||||
if (dram_) {
|
||||
dram_->finish();
|
||||
Stats::statlist.printall();
|
||||
delete dram_;
|
||||
}
|
||||
}
|
||||
|
||||
vl_obj_->device->reset = 1;
|
||||
int prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) {
|
||||
auto alloc = __aligned_malloc(CACHE_BLOCK_SIZE, len);
|
||||
if (alloc == NULL)
|
||||
return -1;
|
||||
host_buffer_t buffer;
|
||||
buffer.data = (uint64_t*)alloc;
|
||||
buffer.size = len;
|
||||
buffer.ioaddr = uintptr_t(alloc);
|
||||
auto buffer_id = host_buffer_ids_++;
|
||||
host_buffers_.emplace(buffer_id, buffer);
|
||||
*buf_addr = alloc;
|
||||
*wsid = buffer_id;
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (int i = 0; i < RESET_DELAY; ++i) {
|
||||
vl_obj_->device->clk = 0;
|
||||
void release_buffer(uint64_t wsid) {
|
||||
auto it = host_buffers_.find(wsid);
|
||||
if (it != host_buffers_.end()) {
|
||||
__aligned_free(it->second.data);
|
||||
host_buffers_.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
void get_io_address(uint64_t wsid, uint64_t *ioaddr) {
|
||||
*ioaddr = host_buffers_[wsid].ioaddr;
|
||||
}
|
||||
|
||||
void read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
|
||||
device_->vcp2af_sRxPort_c0_mmioRdValid = 1;
|
||||
device_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
|
||||
device_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
|
||||
device_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
|
||||
this->step();
|
||||
device_->vcp2af_sRxPort_c0_mmioRdValid = 0;
|
||||
assert(device_->af2cp_sTxPort_c2_mmioRdValid);
|
||||
*value = device_->af2cp_sTxPort_c2_data;
|
||||
}
|
||||
|
||||
void write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
|
||||
device_->vcp2af_sRxPort_c0_mmioWrValid = 1;
|
||||
device_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
|
||||
device_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
|
||||
device_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
|
||||
memcpy(device_->vcp2af_sRxPort_c0_data, &value, 8);
|
||||
this->step();
|
||||
device_->vcp2af_sRxPort_c0_mmioWrValid = 0;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void reset() {
|
||||
cci_reads_.clear();
|
||||
cci_writes_.clear();
|
||||
device_->vcp2af_sRxPort_c0_mmioRdValid = 0;
|
||||
device_->vcp2af_sRxPort_c0_mmioWrValid = 0;
|
||||
device_->vcp2af_sRxPort_c0_rspValid = 0;
|
||||
device_->vcp2af_sRxPort_c1_rspValid = 0;
|
||||
device_->vcp2af_sRxPort_c0_TxAlmFull = 0;
|
||||
device_->vcp2af_sRxPort_c1_TxAlmFull = 0;
|
||||
|
||||
for (int b = 0; b < MEMORY_BANKS; ++b) {
|
||||
pending_mem_reqs_[b].clear();
|
||||
device_->avs_readdatavalid[b] = 0;
|
||||
device_->avs_waitrequest[b] = 0;
|
||||
}
|
||||
|
||||
device_->reset = 1;
|
||||
|
||||
for (int i = 0; i < RESET_DELAY; ++i) {
|
||||
device_->clk = 0;
|
||||
this->eval();
|
||||
device_->clk = 1;
|
||||
this->eval();
|
||||
}
|
||||
|
||||
device_->reset = 0;
|
||||
|
||||
// Turn on assertion after reset
|
||||
Verilated::assertOn(true);
|
||||
}
|
||||
|
||||
void step() {
|
||||
this->sRxPort_bus();
|
||||
this->sTxPort_bus();
|
||||
this->avs_bus();
|
||||
|
||||
device_->clk = 0;
|
||||
this->eval();
|
||||
vl_obj_->device->clk = 1;
|
||||
device_->clk = 1;
|
||||
this->eval();
|
||||
}
|
||||
|
||||
vl_obj_->device->reset = 0;
|
||||
|
||||
// Turn on assertion after reset
|
||||
Verilated::assertOn(true);
|
||||
}
|
||||
dram_->tick();
|
||||
|
||||
void opae_sim::step() {
|
||||
this->sRxPort_bus();
|
||||
this->sTxPort_bus();
|
||||
this->avs_bus();
|
||||
|
||||
vl_obj_->device->clk = 0;
|
||||
this->eval();
|
||||
vl_obj_->device->clk = 1;
|
||||
this->eval();
|
||||
|
||||
#ifndef NDEBUG
|
||||
fflush(stdout);
|
||||
#endif
|
||||
}
|
||||
|
||||
void opae_sim::eval() {
|
||||
vl_obj_->device->eval();
|
||||
#ifdef VCD_OUTPUT
|
||||
if (sim_trace_enabled()) {
|
||||
vl_obj_->trace->dump(timestamp);
|
||||
}
|
||||
#endif
|
||||
++timestamp;
|
||||
}
|
||||
|
||||
void opae_sim::sRxPort_bus() {
|
||||
// check mmio request
|
||||
bool mmio_req_enabled = vl_obj_->device->vcp2af_sRxPort_c0_mmioRdValid
|
||||
|| vl_obj_->device->vcp2af_sRxPort_c0_mmioWrValid;
|
||||
|
||||
// schedule CCI read responses
|
||||
std::list<cci_rd_req_t>::iterator cci_rd_it(cci_reads_.end());
|
||||
for (auto it = cci_reads_.begin(), ie = cci_reads_.end(); it != ie; ++it) {
|
||||
if (it->cycles_left > 0)
|
||||
it->cycles_left -= 1;
|
||||
if ((cci_rd_it == ie) && (it->cycles_left == 0)) {
|
||||
cci_rd_it = it;
|
||||
}
|
||||
#ifndef NDEBUG
|
||||
fflush(stdout);
|
||||
#endif
|
||||
}
|
||||
|
||||
// schedule CCI write responses
|
||||
std::list<cci_wr_req_t>::iterator cci_wr_it(cci_writes_.end());
|
||||
for (auto it = cci_writes_.begin(), ie = cci_writes_.end(); it != ie; ++it) {
|
||||
if (it->cycles_left > 0)
|
||||
it->cycles_left -= 1;
|
||||
if ((cci_wr_it == ie) && (it->cycles_left == 0)) {
|
||||
cci_wr_it = it;
|
||||
}
|
||||
}
|
||||
|
||||
// send CCI write response
|
||||
vl_obj_->device->vcp2af_sRxPort_c1_rspValid = 0;
|
||||
if (cci_wr_it != cci_writes_.end()) {
|
||||
vl_obj_->device->vcp2af_sRxPort_c1_rspValid = 1;
|
||||
vl_obj_->device->vcp2af_sRxPort_c1_hdr_resp_type = 0;
|
||||
vl_obj_->device->vcp2af_sRxPort_c1_hdr_mdata = cci_wr_it->mdata;
|
||||
cci_writes_.erase(cci_wr_it);
|
||||
}
|
||||
|
||||
// send CCI read response (ensure mmio disabled)
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_rspValid = 0;
|
||||
if (!mmio_req_enabled
|
||||
&& (cci_rd_it != cci_reads_.end())) {
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_rspValid = 1;
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_hdr_resp_type = 0;
|
||||
memcpy(vl_obj_->device->vcp2af_sRxPort_c0_data, cci_rd_it->data.data(), CACHE_BLOCK_SIZE);
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata;
|
||||
/*printf("%0ld: [sim] CCI Rd Rsp: addr=%ld, mdata=%d, data=", timestamp, cci_rd_it->addr, cci_rd_it->mdata);
|
||||
for (int i = 0; i < CACHE_BLOCK_SIZE; ++i)
|
||||
printf("%02x", cci_rd_it->data[CACHE_BLOCK_SIZE-1-i]);
|
||||
printf("\n");*/
|
||||
cci_reads_.erase(cci_rd_it);
|
||||
}
|
||||
}
|
||||
|
||||
void opae_sim::sTxPort_bus() {
|
||||
// process read requests
|
||||
if (vl_obj_->device->af2cp_sTxPort_c0_valid) {
|
||||
assert(!vl_obj_->device->vcp2af_sRxPort_c0_TxAlmFull);
|
||||
cci_rd_req_t cci_req;
|
||||
cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD);
|
||||
cci_req.addr = vl_obj_->device->af2cp_sTxPort_c0_hdr_address;
|
||||
cci_req.mdata = vl_obj_->device->af2cp_sTxPort_c0_hdr_mdata;
|
||||
auto host_ptr = (uint64_t*)(vl_obj_->device->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE);
|
||||
memcpy(cci_req.data.data(), host_ptr, CACHE_BLOCK_SIZE);
|
||||
//printf("%0ld: [sim] CCI Rd Req: addr=%ld, mdata=%d\n", timestamp, vl_obj_->device->af2cp_sTxPort_c0_hdr_address, cci_req.mdata);
|
||||
cci_reads_.emplace_back(cci_req);
|
||||
}
|
||||
|
||||
// process write requests
|
||||
if (vl_obj_->device->af2cp_sTxPort_c1_valid) {
|
||||
assert(!vl_obj_->device->vcp2af_sRxPort_c1_TxAlmFull);
|
||||
cci_wr_req_t cci_req;
|
||||
cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD);
|
||||
cci_req.mdata = vl_obj_->device->af2cp_sTxPort_c1_hdr_mdata;
|
||||
auto host_ptr = (uint64_t*)(vl_obj_->device->af2cp_sTxPort_c1_hdr_address * CACHE_BLOCK_SIZE);
|
||||
memcpy(host_ptr, vl_obj_->device->af2cp_sTxPort_c1_data, CACHE_BLOCK_SIZE);
|
||||
cci_writes_.emplace_back(cci_req);
|
||||
}
|
||||
|
||||
// check queues overflow
|
||||
vl_obj_->device->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= (CCI_RQ_SIZE-1));
|
||||
vl_obj_->device->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= (CCI_WQ_SIZE-1));
|
||||
}
|
||||
|
||||
void opae_sim::avs_bus() {
|
||||
for (int b = 0; b < MEMORY_BANKS; ++b) {
|
||||
// update memory responses schedule
|
||||
for (auto& rsp : mem_reads_[b]) {
|
||||
if (rsp.cycles_left > 0)
|
||||
rsp.cycles_left -= 1;
|
||||
}
|
||||
|
||||
// schedule memory responses in FIFO order
|
||||
std::list<mem_rd_req_t>::iterator mem_rd_it(mem_reads_[b].end());
|
||||
if (!mem_reads_[b].empty()
|
||||
&& (0 == mem_reads_[b].begin()->cycles_left)) {
|
||||
mem_rd_it = mem_reads_[b].begin();
|
||||
}
|
||||
|
||||
// send memory response
|
||||
vl_obj_->device->avs_readdatavalid[b] = 0;
|
||||
if (mem_rd_it != mem_reads_[b].end()) {
|
||||
vl_obj_->device->avs_readdatavalid[b] = 1;
|
||||
memcpy(vl_obj_->device->avs_readdata[b], mem_rd_it->data.data(), MEM_BLOCK_SIZE);
|
||||
uint32_t addr = mem_rd_it->addr;
|
||||
mem_reads_[b].erase(mem_rd_it);
|
||||
/*printf("%0ld: [sim] MEM Rd Rsp: bank=%d, addr=%x, pending={", timestamp, b, addr * MEM_BLOCK_SIZE);
|
||||
for (auto& req : mem_reads_[b]) {
|
||||
if (req.cycles_left != 0)
|
||||
printf(" !%0x", req.addr * MEM_BLOCK_SIZE);
|
||||
else
|
||||
printf(" %0x", req.addr * MEM_BLOCK_SIZE);
|
||||
}
|
||||
printf("}\n");*/
|
||||
}
|
||||
|
||||
// handle memory stalls
|
||||
bool mem_stalled = false;
|
||||
#ifdef ENABLE_MEM_STALLS
|
||||
if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) {
|
||||
mem_stalled = true;
|
||||
} else
|
||||
if (mem_reads_[b].size() >= MEM_RQ_SIZE) {
|
||||
mem_stalled = true;
|
||||
void eval() {
|
||||
device_->eval();
|
||||
#ifdef VCD_OUTPUT
|
||||
if (sim_trace_enabled()) {
|
||||
trace_->dump(timestamp);
|
||||
}
|
||||
#endif
|
||||
++timestamp;
|
||||
}
|
||||
|
||||
// process memory requests
|
||||
if (!mem_stalled) {
|
||||
assert(!vl_obj_->device->avs_read[b] || !vl_obj_->device->avs_write[b]);
|
||||
if (vl_obj_->device->avs_write[b]) {
|
||||
uint64_t byteen = vl_obj_->device->avs_byteenable[b];
|
||||
unsigned base_addr = vl_obj_->device->avs_address[b] * MEM_BLOCK_SIZE;
|
||||
uint8_t* data = (uint8_t*)(vl_obj_->device->avs_writedata[b]);
|
||||
void sRxPort_bus() {
|
||||
// check mmio request
|
||||
bool mmio_req_enabled = device_->vcp2af_sRxPort_c0_mmioRdValid
|
||||
|| device_->vcp2af_sRxPort_c0_mmioWrValid;
|
||||
|
||||
// schedule CCI read responses
|
||||
std::list<cci_rd_req_t>::iterator cci_rd_it(cci_reads_.end());
|
||||
for (auto it = cci_reads_.begin(), ie = cci_reads_.end(); it != ie; ++it) {
|
||||
if (it->cycles_left > 0)
|
||||
it->cycles_left -= 1;
|
||||
if ((cci_rd_it == ie) && (it->cycles_left == 0)) {
|
||||
cci_rd_it = it;
|
||||
}
|
||||
}
|
||||
|
||||
// schedule CCI write responses
|
||||
std::list<cci_wr_req_t>::iterator cci_wr_it(cci_writes_.end());
|
||||
for (auto it = cci_writes_.begin(), ie = cci_writes_.end(); it != ie; ++it) {
|
||||
if (it->cycles_left > 0)
|
||||
it->cycles_left -= 1;
|
||||
if ((cci_wr_it == ie) && (it->cycles_left == 0)) {
|
||||
cci_wr_it = it;
|
||||
}
|
||||
}
|
||||
|
||||
// send CCI write response
|
||||
device_->vcp2af_sRxPort_c1_rspValid = 0;
|
||||
if (cci_wr_it != cci_writes_.end()) {
|
||||
device_->vcp2af_sRxPort_c1_rspValid = 1;
|
||||
device_->vcp2af_sRxPort_c1_hdr_resp_type = 0;
|
||||
device_->vcp2af_sRxPort_c1_hdr_mdata = cci_wr_it->mdata;
|
||||
cci_writes_.erase(cci_wr_it);
|
||||
}
|
||||
|
||||
// send CCI read response (ensure mmio disabled)
|
||||
device_->vcp2af_sRxPort_c0_rspValid = 0;
|
||||
if (!mmio_req_enabled
|
||||
&& (cci_rd_it != cci_reads_.end())) {
|
||||
device_->vcp2af_sRxPort_c0_rspValid = 1;
|
||||
device_->vcp2af_sRxPort_c0_hdr_resp_type = 0;
|
||||
memcpy(device_->vcp2af_sRxPort_c0_data, cci_rd_it->data.data(), CACHE_BLOCK_SIZE);
|
||||
device_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata;
|
||||
/*printf("%0ld: [sim] CCI Rd Rsp: addr=%ld, mdata=%d, data=", timestamp, cci_rd_it->addr, cci_rd_it->mdata);
|
||||
for (int i = 0; i < CACHE_BLOCK_SIZE; ++i)
|
||||
printf("%02x", cci_rd_it->data[CACHE_BLOCK_SIZE-1-i]);
|
||||
printf("\n");*/
|
||||
cci_reads_.erase(cci_rd_it);
|
||||
}
|
||||
}
|
||||
|
||||
void sTxPort_bus() {
|
||||
// process read requests
|
||||
if (device_->af2cp_sTxPort_c0_valid) {
|
||||
assert(!device_->vcp2af_sRxPort_c0_TxAlmFull);
|
||||
cci_rd_req_t cci_req;
|
||||
cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD);
|
||||
cci_req.addr = device_->af2cp_sTxPort_c0_hdr_address;
|
||||
cci_req.mdata = device_->af2cp_sTxPort_c0_hdr_mdata;
|
||||
auto host_ptr = (uint64_t*)(device_->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE);
|
||||
memcpy(cci_req.data.data(), host_ptr, CACHE_BLOCK_SIZE);
|
||||
//printf("%0ld: [sim] CCI Rd Req: addr=%ld, mdata=%d\n", timestamp, device_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata);
|
||||
cci_reads_.emplace_back(cci_req);
|
||||
}
|
||||
|
||||
// process write requests
|
||||
if (device_->af2cp_sTxPort_c1_valid) {
|
||||
assert(!device_->vcp2af_sRxPort_c1_TxAlmFull);
|
||||
cci_wr_req_t cci_req;
|
||||
cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD);
|
||||
cci_req.mdata = device_->af2cp_sTxPort_c1_hdr_mdata;
|
||||
auto host_ptr = (uint64_t*)(device_->af2cp_sTxPort_c1_hdr_address * CACHE_BLOCK_SIZE);
|
||||
memcpy(host_ptr, device_->af2cp_sTxPort_c1_data, CACHE_BLOCK_SIZE);
|
||||
cci_writes_.emplace_back(cci_req);
|
||||
}
|
||||
|
||||
// check queues overflow
|
||||
device_->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= (CCI_RQ_SIZE-1));
|
||||
device_->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= (CCI_WQ_SIZE-1));
|
||||
}
|
||||
|
||||
void avs_bus() {
|
||||
for (int b = 0; b < MEMORY_BANKS; ++b) {
|
||||
// process memory responses
|
||||
device_->avs_readdatavalid[b] = 0;
|
||||
if (!pending_mem_reqs_[b].empty()
|
||||
&& (*pending_mem_reqs_[b].begin())->ready) {
|
||||
auto mem_rd_it = pending_mem_reqs_[b].begin();
|
||||
auto mem_req = *mem_rd_it;
|
||||
device_->avs_readdatavalid[b] = 1;
|
||||
memcpy(device_->avs_readdata[b], mem_req->data.data(), MEM_BLOCK_SIZE);
|
||||
uint32_t addr = mem_req->addr;
|
||||
pending_mem_reqs_[b].erase(mem_rd_it);
|
||||
delete mem_req;
|
||||
}
|
||||
|
||||
// process memory requests
|
||||
assert(!device_->avs_read[b] || !device_->avs_write[b]);
|
||||
unsigned byte_addr = device_->avs_address[b] * MEM_BLOCK_SIZE;
|
||||
if (device_->avs_write[b]) {
|
||||
uint64_t byteen = device_->avs_byteenable[b];
|
||||
uint8_t* data = (uint8_t*)(device_->avs_writedata[b]);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
(*ram_)[base_addr + i] = data[i];
|
||||
(*ram_)[byte_addr + i] = data[i];
|
||||
}
|
||||
}
|
||||
/*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=%x, data=", timestamp, b, base_addr);
|
||||
|
||||
/*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=%x, data=", timestamp, b, byte_addr);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
printf("\n");*/
|
||||
|
||||
// send dram request
|
||||
ramulator::Request dram_req(
|
||||
byte_addr,
|
||||
ramulator::Request::Type::WRITE,
|
||||
0
|
||||
);
|
||||
dram_->send(dram_req);
|
||||
}
|
||||
if (vl_obj_->device->avs_read[b]) {
|
||||
mem_rd_req_t mem_req;
|
||||
mem_req.addr = vl_obj_->device->avs_address[b];
|
||||
ram_->read(mem_req.data.data(), vl_obj_->device->avs_address[b] * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE);
|
||||
mem_req.cycles_left = MEM_LATENCY;
|
||||
for (auto& rsp : mem_reads_[b]) {
|
||||
if (mem_req.addr == rsp.addr) {
|
||||
// duplicate requests receive the same cycle delay
|
||||
mem_req.cycles_left = rsp.cycles_left;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mem_reads_[b].emplace_back(mem_req);
|
||||
|
||||
if (device_->avs_read[b]) {
|
||||
auto mem_req = new mem_rd_req_t();
|
||||
mem_req->addr = device_->avs_address[b];
|
||||
ram_->read(mem_req->data.data(), byte_addr, MEM_BLOCK_SIZE);
|
||||
mem_req->ready = false;
|
||||
pending_mem_reqs_[b].emplace_back(mem_req);
|
||||
|
||||
/*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=%x, pending={", timestamp, b, mem_req.addr * MEM_BLOCK_SIZE);
|
||||
for (auto& req : mem_reads_[b]) {
|
||||
for (auto& req : pending_mem_reqs_[b]) {
|
||||
if (req.cycles_left != 0)
|
||||
printf(" !%0x", req.addr * MEM_BLOCK_SIZE);
|
||||
else
|
||||
printf(" %0x", req.addr * MEM_BLOCK_SIZE);
|
||||
}
|
||||
printf("}\n");*/
|
||||
}
|
||||
}
|
||||
|
||||
vl_obj_->device->avs_waitrequest[b] = mem_stalled;
|
||||
// send dram request
|
||||
ramulator::Request dram_req(
|
||||
byte_addr,
|
||||
ramulator::Request::Type::READ,
|
||||
std::bind([](ramulator::Request& dram_req, mem_rd_req_t* mem_req) {
|
||||
mem_req->ready = true;
|
||||
}, placeholders::_1, mem_req),
|
||||
0
|
||||
);
|
||||
dram_->send(dram_req);
|
||||
}
|
||||
|
||||
device_->avs_waitrequest[b] = false;
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
bool ready;
|
||||
std::array<uint8_t, MEM_BLOCK_SIZE> data;
|
||||
uint32_t addr;
|
||||
} mem_rd_req_t;
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
std::array<uint8_t, CACHE_BLOCK_SIZE> data;
|
||||
uint64_t addr;
|
||||
uint32_t mdata;
|
||||
} cci_rd_req_t;
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
uint32_t mdata;
|
||||
} cci_wr_req_t;
|
||||
|
||||
typedef struct {
|
||||
uint64_t* data;
|
||||
size_t size;
|
||||
uint64_t ioaddr;
|
||||
} host_buffer_t;
|
||||
|
||||
std::future<void> future_;
|
||||
bool stop_;
|
||||
|
||||
std::unordered_map<int64_t, host_buffer_t> host_buffers_;
|
||||
int64_t host_buffer_ids_;
|
||||
|
||||
std::list<mem_rd_req_t*> pending_mem_reqs_[MEMORY_BANKS];
|
||||
|
||||
std::list<cci_rd_req_t> cci_reads_;
|
||||
|
||||
std::list<cci_wr_req_t> cci_writes_;
|
||||
|
||||
std::mutex mutex_;
|
||||
|
||||
RAM *ram_;
|
||||
|
||||
ramulator::Gem5Wrapper* dram_;
|
||||
|
||||
Vvortex_afu_shim *device_;
|
||||
#ifdef VCD_OUTPUT
|
||||
VerilatedVcdC *trace_;
|
||||
#endif
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
opae_sim::opae_sim()
|
||||
: impl_(new Impl())
|
||||
{}
|
||||
|
||||
opae_sim::~opae_sim() {
|
||||
delete impl_;
|
||||
}
|
||||
|
||||
int opae_sim::prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) {
|
||||
return impl_->prepare_buffer(len, buf_addr, wsid, flags);
|
||||
}
|
||||
|
||||
void opae_sim::release_buffer(uint64_t wsid) {
|
||||
impl_->release_buffer(wsid);
|
||||
}
|
||||
|
||||
void opae_sim::get_io_address(uint64_t wsid, uint64_t *ioaddr) {
|
||||
impl_->get_io_address(wsid, ioaddr);
|
||||
}
|
||||
|
||||
void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) {
|
||||
impl_->write_mmio64(mmio_num, offset, value);
|
||||
}
|
||||
|
||||
void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) {
|
||||
impl_->read_mmio64(mmio_num, offset, value);
|
||||
}
|
||||
@@ -1,29 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <VX_config.h>
|
||||
#include <vortex_afu.h>
|
||||
|
||||
#include <ostream>
|
||||
#include <future>
|
||||
#include <list>
|
||||
#include <unordered_map>
|
||||
|
||||
#ifndef MEMORY_BANKS
|
||||
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
||||
#define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
||||
#else
|
||||
#define MEMORY_BANKS 2
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#undef MEM_BLOCK_SIZE
|
||||
#define MEM_BLOCK_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH / 8)
|
||||
|
||||
#define CACHE_BLOCK_SIZE 64
|
||||
|
||||
#include <stdint.h>
|
||||
namespace vortex {
|
||||
|
||||
class VL_OBJ;
|
||||
class RAM;
|
||||
|
||||
class opae_sim {
|
||||
@@ -44,57 +23,8 @@ public:
|
||||
|
||||
private:
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
std::array<uint8_t, MEM_BLOCK_SIZE> data;
|
||||
uint32_t addr;
|
||||
} mem_rd_req_t;
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
std::array<uint8_t, CACHE_BLOCK_SIZE> data;
|
||||
uint64_t addr;
|
||||
uint32_t mdata;
|
||||
} cci_rd_req_t;
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
uint32_t mdata;
|
||||
} cci_wr_req_t;
|
||||
|
||||
typedef struct {
|
||||
uint64_t* data;
|
||||
size_t size;
|
||||
uint64_t ioaddr;
|
||||
} host_buffer_t;
|
||||
|
||||
void reset();
|
||||
|
||||
void eval();
|
||||
|
||||
void step();
|
||||
|
||||
void sRxPort_bus();
|
||||
void sTxPort_bus();
|
||||
void avs_bus();
|
||||
|
||||
std::future<void> future_;
|
||||
bool stop_;
|
||||
|
||||
std::unordered_map<int64_t, host_buffer_t> host_buffers_;
|
||||
int64_t host_buffer_ids_;
|
||||
|
||||
std::list<mem_rd_req_t> mem_reads_ [MEMORY_BANKS];
|
||||
|
||||
std::list<cci_rd_req_t> cci_reads_;
|
||||
|
||||
std::list<cci_wr_req_t> cci_writes_;
|
||||
|
||||
std::mutex mutex_;
|
||||
|
||||
RAM *ram_;
|
||||
|
||||
VL_OBJ* vl_obj_;
|
||||
class Impl;
|
||||
Impl* impl_;
|
||||
};
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user