using ramulator dram simulator

This commit is contained in:
Blaise Tine
2021-12-06 01:22:45 -05:00
parent 59232642c4
commit b741807f8c
33 changed files with 1473 additions and 1344 deletions

View File

@@ -1,3 +1,4 @@
DESTDIR ?= .
RTL_DIR = ../../hw/rtl
DPI_DIR = ../../hw/dpi
THIRD_PARTY_DIR = ../../third_party
@@ -6,8 +7,10 @@ CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
CXXFLAGS += -I../../../hw -I../../common
CXXFLAGS += -I../$(THIRD_PARTY_DIR)/softfloat/source/include
CXXFLAGS += -I../$(THIRD_PARTY_DIR)
LDFLAGS += ../$(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
LDFLAGS += -L../$(THIRD_PARTY_DIR)/ramulator -lramulator
# control RTL debug tracing states
DBG_TRACE_FLAGS += -DDBG_TRACE_PIPELINE
@@ -31,7 +34,7 @@ RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interface
SRCS = ../common/util.cpp ../common/mem.cpp ../common/rvfloats.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
SRCS += main.cpp simulator.cpp
SRCS += processor.cpp
ifdef AXI_BUS
TOP = Vortex_axi
@@ -86,15 +89,11 @@ PROJECT = rtlsim
all: $(PROJECT)
$(PROJECT): $(SRCS)
verilator --build $(VL_FLAGS) $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
$(DESTDIR)/$(PROJECT): $(SRCS) main.cpp
verilator --build $(VL_FLAGS) $^ $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$@
static: $(SRCS)
verilator --build $(VL_FLAGS) $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)'
$(AR) rcs lib$(PROJECT).a obj_dir/*.o $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/*.o
$(DESTDIR)/lib$(PROJECT).so: $(SRCS)
verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -LDFLAGS '-shared $(LDFLAGS)' -o ../$@
clean-static:
rm -rf lib$(PROJECT).a obj_dir
clean: clean-static
rm -rf $(PROJECT)
clean:
rm -rf obj_dir $(DESTDIR)/$(PROJECT) $(DESTDIR)/lib$(PROJECT).so

View File

@@ -5,7 +5,8 @@
#include <unistd.h>
#include <util.h>
#include <mem.h>
#include "simulator.h"
#include <VX_config.h>
#include "processor.h"
#define RAM_PAGE_SIZE 4096
@@ -52,8 +53,8 @@ int main(int argc, char **argv) {
std::cout << "Running " << program << "..." << std::endl;
vortex::RAM ram(RAM_PAGE_SIZE);
vortex::Simulator simulator;
simulator.attach_ram(&ram);
vortex::Processor processor;
processor.attach_ram(&ram);
std::string program_ext(fileExtension(program));
if (program_ext == "bin") {
@@ -65,7 +66,7 @@ int main(int argc, char **argv) {
return -1;
}
exitcode = simulator.run();
exitcode = processor.run();
if (riscv_test) {
if (1 == exitcode) {

599
sim/rtlsim/processor.cpp Normal file
View File

@@ -0,0 +1,599 @@
#include "processor.h"
#include <verilated.h>
#ifdef AXI_BUS
#include "VVortex_axi.h"
#include "VVortex_axi__Syms.h"
#else
#include "VVortex.h"
#include "VVortex__Syms.h"
#endif
#ifdef VCD_OUTPUT
#include <verilated_vcd_c.h>
#endif
#include <iostream>
#include <fstream>
#include <iomanip>
#include <mem.h>
#include <VX_config.h>
#include <ostream>
#include <list>
#include <vector>
#include <sstream>
#include <unordered_map>
#define RAMULATOR
#include <ramulator/src/Gem5Wrapper.h>
#include <ramulator/src/Request.h>
#include <ramulator/src/Statistics.h>
#ifndef MEMORY_BANKS
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
#define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS
#else
#define MEMORY_BANKS 2
#endif
#endif
#define ENABLE_MEM_STALLS
#ifndef TRACE_START_TIME
#define TRACE_START_TIME 0ull
#endif
#ifndef TRACE_STOP_TIME
#define TRACE_STOP_TIME -1ull
#endif
#ifndef VERILATOR_RESET_VALUE
#define VERILATOR_RESET_VALUE 2
#endif
#define VL_WDATA_GETW(lwp, i, n, w) \
VL_SEL_IWII(0, n * w, 0, 0, lwp, i * w, w)
using namespace vortex;
static uint64_t timestamp = 0;
double sc_time_stamp() {
return timestamp;
}
///////////////////////////////////////////////////////////////////////////////
static bool trace_enabled = false;
static uint64_t trace_start_time = TRACE_START_TIME;
static uint64_t trace_stop_time = TRACE_STOP_TIME;
bool sim_trace_enabled() {
if (timestamp >= trace_start_time
&& timestamp < trace_stop_time)
return true;
return trace_enabled;
}
void sim_trace_enable(bool enable) {
trace_enabled = enable;
}
///////////////////////////////////////////////////////////////////////////////
class Processor::Impl {
public:
Impl() {
// force random values for unitialized signals
Verilated::randReset(VERILATOR_RESET_VALUE);
Verilated::randSeed(50);
// turn off assertion before reset
Verilated::assertOn(false);
// create RTL module instance
#ifdef AXI_BUS
device_ = new VVortex_axi();
#else
device_ = new VVortex();
#endif
#ifdef VCD_OUTPUT
Verilated::traceEverOn(true);
trace_ = new VerilatedVcdC();
device_->trace(trace_, 99);
trace_->open("trace.vcd");
#endif
ram_ = nullptr;
// initialize dram simulator
ramulator::Config ram_config;
ram_config.add("standard", "DDR4");
ram_config.add("channels", std::to_string(MEMORY_BANKS));
ram_config.add("ranks", "1");
ram_config.add("speed", "DDR4_2400R");
ram_config.add("org", "DDR4_4Gb_x8");
ram_config.add("mapping", "defaultmapping");
ram_config.set_core_num(1);
dram_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
Stats::statlist.output("ramulator.ddr4.log");
// reset the device
this->reset();
}
~Impl() {
for (auto& buf : print_bufs_) {
auto str = buf.second.str();
if (!str.empty()) {
std::cout << "#" << buf.first << ": " << str << std::endl;
}
}
#ifdef VCD_OUTPUT
trace_->close();
delete trace_;
#endif
delete device_;
if (dram_) {
dram_->finish();
Stats::statlist.printall();
delete dram_;
}
}
void attach_ram(RAM* ram) {
ram_ = ram;
}
void reset() {
print_bufs_.clear();
pending_mem_reqs_.clear();
mem_rd_rsp_active_ = false;
mem_wr_rsp_active_ = false;
#ifdef AXI_BUS
this->reset_axi_bus();
#else
this->reset_avs_bus();
#endif
device_->reset = 1;
for (int i = 0; i < RESET_DELAY; ++i) {
device_->clk = 0;
this->eval();
device_->clk = 1;
this->eval();
}
device_->reset = 0;
// Turn on assertion after reset
Verilated::assertOn(true);
}
int run() {
int exitcode = 0;
#ifndef NDEBUG
std::cout << std::dec << timestamp << ": [sim] run()" << std::endl;
#endif
// execute program
while (device_->busy) {
if (get_ebreak()) {
exitcode = get_last_wb_value(3);
break;
}
this->step();
}
// wait 5 cycles to flush the pipeline
this->wait(5);
return exitcode;
}
private:
void step() {
device_->clk = 0;
this->eval();
#ifdef AXI_BUS
this->eval_axi_bus(0);
#else
this->eval_avs_bus(0);
#endif
device_->clk = 1;
this->eval();
#ifdef AXI_BUS
this->eval_axi_bus(1);
#else
this->eval_avs_bus(1);
#endif
dram_->tick();
#ifndef NDEBUG
fflush(stdout);
#endif
}
void eval() {
device_->eval();
#ifdef VCD_OUTPUT
if (sim_trace_enabled()) {
trace_->dump(timestamp);
}
#endif
++timestamp;
}
#ifdef AXI_BUS
void reset_axi_bus() {
device_->m_axi_wready = 0;
device_->m_axi_awready = 0;
device_->m_axi_arready = 0;
device_->m_axi_rvalid = 0;
device_->m_axi_bvalid = 0;
}
void eval_axi_bus(bool clk) {
if (!clk) {
mem_rd_rsp_ready_ = device_->m_axi_rready;
mem_wr_rsp_ready_ = device_->m_axi_bready;
return;
}
if (ram_ == nullptr) {
device_->m_axi_wready = 0;
device_->m_axi_awready = 0;
device_->m_axi_arready = 0;
return;
}
// process memory responses
if (mem_rd_rsp_active_
&& device_->m_axi_rvalid && mem_rd_rsp_ready_) {
mem_rd_rsp_active_ = false;
}
if (!mem_rd_rsp_active_) {
if (!pending_mem_reqs_.empty()
&& (*pending_mem_reqs_.begin())->ready
&& !(*pending_mem_reqs_.begin())->write) {
auto mem_rsp_it = pending_mem_reqs_.begin();
auto mem_req = *mem_rsp_it;
/*
printf("%0ld: [sim] MEM Rd Rsp: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_req->addr);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
printf("%02x", mem_req->block[(MEM_BLOCK_SIZE-1)-i]);
}
printf("\n");
*/
device_->m_axi_rvalid = 1;
device_->m_axi_rid = mem_req->tag;
device_->m_axi_rresp = 0;
device_->m_axi_rlast = 1;
memcpy((uint8_t*)device_->m_axi_rdata, mem_req->block.data(), MEM_BLOCK_SIZE);
pending_mem_reqs_.erase(mem_rsp_it);
mem_rd_rsp_active_ = true;
delete mem_req;
} else {
device_->m_axi_rvalid = 0;
}
}
// send memory write response
if (mem_wr_rsp_active_
&& device_->m_axi_bvalid && mem_wr_rsp_ready_) {
mem_wr_rsp_active_ = false;
}
if (!mem_wr_rsp_active_) {
if (!pending_mem_reqs_.empty()
&& (*pending_mem_reqs_.begin())->ready
&& (*pending_mem_reqs_.begin())->write) {
auto mem_rsp_it = pending_mem_reqs_.begin();
auto mem_req = *mem_rsp_it;
/*
printf("%0ld: [sim] MEM Wr Rsp: bank=%d, addr=%0lx\n", timestamp, last_mem_rsp_bank_, mem_req->addr);
*/
device_->m_axi_bvalid = 1;
device_->m_axi_bid = mem_req->tag;
device_->m_axi_bresp = 0;
pending_mem_reqs_.erase(mem_rsp_it);
mem_wr_rsp_active_ = true;
delete mem_req;
} else {
device_->m_axi_bvalid = 0;
}
}
// select the memory bank
uint32_t req_addr = device_->m_axi_wvalid ? device_->m_axi_awaddr : device_->m_axi_araddr;
// process memory requests
if (device_->m_axi_wvalid || device_->m_axi_arvalid) {
if (device_->m_axi_wvalid) {
uint64_t byteen = device_->m_axi_wstrb;
unsigned base_addr = device_->m_axi_awaddr;
uint8_t* data = (uint8_t*)(device_->m_axi_wdata);
// check console output
if (base_addr >= IO_COUT_ADDR
&& base_addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) {
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
auto& ss_buf = print_bufs_[i];
char c = data[i];
ss_buf << c;
if (c == '\n') {
std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush;
ss_buf.str("");
}
}
}
} else {
/*
printf("%0ld: [sim] MEM Wr: addr=%0x, byteen=%0lx, data=", timestamp, base_addr, byteen);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]);
}
printf("\n");
*/
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
(*ram_)[base_addr + i] = data[i];
}
}
auto mem_req = new mem_req_t();
mem_req->tag = device_->m_axi_awid;
mem_req->addr = device_->m_axi_awaddr;
mem_req->write = true;
mem_req->ready = true;
pending_mem_reqs_.emplace_back(mem_req);
// send dram request
ramulator::Request dram_req(
device_->m_axi_awaddr,
ramulator::Request::Type::WRITE,
0
);
dram_->send(dram_req);
}
} else {
// process reads
auto mem_req = new mem_req_t();
mem_req->tag = device_->m_axi_arid;
mem_req->addr = device_->m_axi_araddr;
ram_->read(mem_req->block.data(), device_->m_axi_araddr, MEM_BLOCK_SIZE);
mem_req->write = false;
mem_req->ready = false;
pending_mem_reqs_.emplace_back(mem_req);
// send dram request
ramulator::Request dram_req(
device_->m_axi_araddr,
ramulator::Request::Type::READ,
std::bind([](ramulator::Request& dram_req, mem_req_t* mem_req) {
mem_req->ready = true;
}, placeholders::_1, mem_req),
0
);
dram_->send(dram_req);
}
}
device_->m_axi_wready = 1;
device_->m_axi_awready = 1;
device_->m_axi_arready = 1;
}
#else
void reset_avs_bus() {
device_->mem_req_ready = 0;
device_->mem_rsp_valid = 0;
}
void eval_avs_bus(bool clk) {
if (!clk) {
mem_rd_rsp_ready_ = device_->mem_rsp_ready;
return;
}
if (ram_ == nullptr) {
device_->mem_req_ready = 0;
return;
}
// process memory responses
if (mem_rd_rsp_active_
&& device_->mem_rsp_valid && mem_rd_rsp_ready_) {
mem_rd_rsp_active_ = false;
}
if (!mem_rd_rsp_active_) {
if (!pending_mem_reqs_.empty()
&& (*pending_mem_reqs_.begin())->ready) {
device_->mem_rsp_valid = 1;
auto mem_rsp_it = pending_mem_reqs_.begin();
auto mem_req = *mem_rsp_it;
/*
printf("%0ld: [sim] MEM Rd: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_req->addr);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
printf("%02x", mem_req->block[(MEM_BLOCK_SIZE-1)-i]);
}
printf("\n");
*/
memcpy((uint8_t*)device_->mem_rsp_data, mem_req->block.data(), MEM_BLOCK_SIZE);
device_->mem_rsp_tag = mem_req->tag;
pending_mem_reqs_.erase(mem_rsp_it);
mem_rd_rsp_active_ = true;
delete mem_req;
} else {
device_->mem_rsp_valid = 0;
}
}
// process memory requests
if (device_->mem_req_valid) {
uint32_t byte_addr = (device_->mem_req_addr * MEM_BLOCK_SIZE);
if (device_->mem_req_rw) {
// process writes
uint64_t byteen = device_->mem_req_byteen;
uint8_t* data = (uint8_t*)(device_->mem_req_data);
// check console output
if (byte_addr >= IO_COUT_ADDR
&& byte_addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) {
for (int i = 0; i < IO_COUT_SIZE; i++) {
if ((byteen >> i) & 0x1) {
auto& ss_buf = print_bufs_[i];
char c = data[i];
ss_buf << c;
if (c == '\n') {
std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush;
ss_buf.str("");
}
}
}
} else {
/*
printf("%0ld: [sim] MEM Wr: addr=%0x, byteen=%0lx, data=", timestamp, byte_addr, byteen);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]);
}
printf("\n");
*/
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
(*ram_)[byte_addr + i] = data[i];
}
}
// send dram request
ramulator::Request dram_req(
byte_addr,
ramulator::Request::Type::WRITE,
0
);
dram_->send(dram_req);
}
} else {
// process reads
auto mem_req = new mem_req_t();
mem_req->tag = device_->mem_req_tag;
mem_req->addr = byte_addr;
mem_req->write = false;
mem_req->ready = false;
ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE);
pending_mem_reqs_.emplace_back(mem_req);
// send dram request
ramulator::Request dram_req(
byte_addr,
ramulator::Request::Type::READ,
std::bind([](ramulator::Request& dram_req, mem_req_t* mem_req) {
mem_req->ready = true;
}, placeholders::_1, mem_req),
0
);
dram_->send(dram_req);
}
}
device_->mem_req_ready = 1;
}
#endif
void wait(uint32_t cycles) {
for (int i = 0; i < cycles; ++i) {
this->step();
}
}
bool get_ebreak() const {
#ifdef AXI_BUS
return (bool)device_->Vortex_axi->vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->execute->ebreak;
#else
return (bool)device_->Vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->execute->ebreak;
#endif
}
int get_last_wb_value(int reg) const {
#ifdef AXI_BUS
return (int)device_->Vortex_axi->vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
#else
return (int)device_->Vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
#endif
}
private:
typedef struct {
bool ready;
std::array<uint8_t, MEM_BLOCK_SIZE> block;
uint64_t addr;
uint64_t tag;
bool write;
} mem_req_t;
#ifdef AXI_BUS
VVortex_axi *device_;
#else
VVortex *device_;
#endif
#ifdef VCD_OUTPUT
VerilatedVcdC *trace_;
#endif
std::unordered_map<int, std::stringstream> print_bufs_;
std::list<mem_req_t*> pending_mem_reqs_;
bool mem_rd_rsp_active_;
bool mem_rd_rsp_ready_;
bool mem_wr_rsp_active_;
bool mem_wr_rsp_ready_;
RAM *ram_;
ramulator::Gem5Wrapper* dram_;
};
///////////////////////////////////////////////////////////////////////////////
Processor::Processor()
: impl_(new Impl())
{}
Processor::~Processor() {
delete impl_;
}
void Processor::attach_ram(RAM* mem) {
impl_->attach_ram(mem);
}
void Processor::reset() {
impl_->reset();
}
int Processor::run() {
return impl_->run();
}

25
sim/rtlsim/processor.h Normal file
View File

@@ -0,0 +1,25 @@
#pragma once
namespace vortex {
class RAM;
class Processor {
public:
Processor();
virtual ~Processor();
void attach_ram(RAM* ram);
void reset();
int run();
private:
class Impl;
Impl* impl_;
};
}

View File

@@ -1,579 +0,0 @@
#include "simulator.h"
#include <verilated.h>
#ifdef AXI_BUS
#include "VVortex_axi.h"
#include "VVortex_axi__Syms.h"
#else
#include "VVortex.h"
#include "VVortex__Syms.h"
#endif
#ifdef VCD_OUTPUT
#include <verilated_vcd_c.h>
#endif
#include <iostream>
#include <fstream>
#include <iomanip>
#include <mem.h>
#define ENABLE_MEM_STALLS
#ifndef TRACE_START_TIME
#define TRACE_START_TIME 0ull
#endif
#ifndef TRACE_STOP_TIME
#define TRACE_STOP_TIME -1ull
#endif
#ifndef MEM_LATENCY
#define MEM_LATENCY 24
#endif
#ifndef MEM_RQ_SIZE
#define MEM_RQ_SIZE 16
#endif
#ifndef MEM_STALLS_MODULO
#define MEM_STALLS_MODULO 16
#endif
#ifndef VERILATOR_RESET_VALUE
#define VERILATOR_RESET_VALUE 2
#endif
#define VL_WDATA_GETW(lwp, i, n, w) \
VL_SEL_IWII(0, n * w, 0, 0, lwp, i * w, w)
using namespace vortex;
static uint64_t timestamp = 0;
double sc_time_stamp() {
return timestamp;
}
///////////////////////////////////////////////////////////////////////////////
static bool trace_enabled = false;
static uint64_t trace_start_time = TRACE_START_TIME;
static uint64_t trace_stop_time = TRACE_STOP_TIME;
bool sim_trace_enabled() {
if (timestamp >= trace_start_time
&& timestamp < trace_stop_time)
return true;
return trace_enabled;
}
void sim_trace_enable(bool enable) {
trace_enabled = enable;
}
///////////////////////////////////////////////////////////////////////////////
namespace vortex {
class VL_OBJ {
public:
#ifdef AXI_BUS
VVortex_axi *device;
#else
VVortex *device;
#endif
#ifdef VCD_OUTPUT
VerilatedVcdC *trace;
#endif
VL_OBJ() {
// force random values for unitialized signals
Verilated::randReset(VERILATOR_RESET_VALUE);
Verilated::randSeed(50);
// Turn off assertion before reset
Verilated::assertOn(false);
#ifdef AXI_BUS
this->device = new VVortex_axi();
#else
this->device = new VVortex();
#endif
#ifdef VCD_OUTPUT
Verilated::traceEverOn(true);
this->trace = new VerilatedVcdC();
this->device->trace(this->trace, 99);
this->trace->open("trace.vcd");
#endif
}
~VL_OBJ() {
#ifdef VCD_OUTPUT
this->trace->close();
delete this->trace;
#endif
delete this->device;
}
};
}
///////////////////////////////////////////////////////////////////////////////
Simulator::Simulator() {
vl_obj_ = new VL_OBJ();
ram_ = nullptr;
// reset the device
this->reset();
}
Simulator::~Simulator() {
for (auto& buf : print_bufs_) {
auto str = buf.second.str();
if (!str.empty()) {
std::cout << "#" << buf.first << ": " << str << std::endl;
}
}
delete vl_obj_;
}
void Simulator::attach_ram(RAM* ram) {
ram_ = ram;
for (int b = 0; b < MEMORY_BANKS; ++b) {
mem_rsp_vec_[b].clear();
}
last_mem_rsp_bank_ = 0;
}
void Simulator::reset() {
print_bufs_.clear();
for (int b = 0; b < MEMORY_BANKS; ++b) {
mem_rsp_vec_[b].clear();
}
last_mem_rsp_bank_ = 0;
mem_rd_rsp_active_ = false;
mem_wr_rsp_active_ = false;
#ifdef AXI_BUS
this->reset_axi_bus();
#else
this->reset_mem_bus();
#endif
vl_obj_->device->reset = 1;
for (int i = 0; i < RESET_DELAY; ++i) {
vl_obj_->device->clk = 0;
this->eval();
vl_obj_->device->clk = 1;
this->eval();
}
vl_obj_->device->reset = 0;
// Turn on assertion after reset
Verilated::assertOn(true);
}
void Simulator::step() {
vl_obj_->device->clk = 0;
this->eval();
#ifdef AXI_BUS
this->eval_axi_bus(0);
#else
this->eval_mem_bus(0);
#endif
vl_obj_->device->clk = 1;
this->eval();
#ifdef AXI_BUS
this->eval_axi_bus(1);
#else
this->eval_mem_bus(1);
#endif
#ifndef NDEBUG
fflush(stdout);
#endif
}
void Simulator::eval() {
vl_obj_->device->eval();
#ifdef VCD_OUTPUT
if (sim_trace_enabled()) {
vl_obj_->trace->dump(timestamp);
}
#endif
++timestamp;
}
#ifdef AXI_BUS
void Simulator::reset_axi_bus() {
vl_obj_->device->m_axi_wready = 0;
vl_obj_->device->m_axi_awready = 0;
vl_obj_->device->m_axi_arready = 0;
vl_obj_->device->m_axi_rvalid = 0;
vl_obj_->device->m_axi_bvalid = 0;
}
void Simulator::eval_axi_bus(bool clk) {
if (!clk) {
mem_rd_rsp_ready_ = vl_obj_->device->m_axi_rready;
mem_wr_rsp_ready_ = vl_obj_->device->m_axi_bready;
return;
}
if (ram_ == nullptr) {
vl_obj_->device->m_axi_wready = 0;
vl_obj_->device->m_axi_awready = 0;
vl_obj_->device->m_axi_arready = 0;
return;
}
// update memory responses schedule
for (int b = 0; b < MEMORY_BANKS; ++b) {
for (auto& rsp : mem_rsp_vec_[b]) {
if (rsp.cycles_left > 0)
rsp.cycles_left -= 1;
}
}
bool has_rd_response = false;
bool has_wr_response = false;
// schedule memory responses that are ready
for (int i = 0; i < MEMORY_BANKS; ++i) {
uint32_t b = (i + last_mem_rsp_bank_ + 1) % MEMORY_BANKS;
if (!mem_rsp_vec_[b].empty()) {
auto mem_rsp_it = mem_rsp_vec_[b].begin();
if (mem_rsp_it->cycles_left <= 0) {
has_rd_response = !mem_rsp_it->write;
has_wr_response = mem_rsp_it->write;
last_mem_rsp_bank_ = b;
break;
}
}
}
// send memory read response
if (mem_rd_rsp_active_
&& vl_obj_->device->m_axi_rvalid && mem_rd_rsp_ready_) {
mem_rd_rsp_active_ = false;
}
if (!mem_rd_rsp_active_) {
if (has_rd_response) {
auto mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin();
/*
printf("%0ld: [sim] MEM Rd Rsp: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
printf("%02x", mem_rsp_it->block[(MEM_BLOCK_SIZE-1)-i]);
}
printf("\n");
*/
vl_obj_->device->m_axi_rvalid = 1;
vl_obj_->device->m_axi_rid = mem_rsp_it->tag;
vl_obj_->device->m_axi_rresp = 0;
vl_obj_->device->m_axi_rlast = 1;
memcpy((uint8_t*)vl_obj_->device->m_axi_rdata, mem_rsp_it->block.data(), MEM_BLOCK_SIZE);
mem_rsp_vec_[last_mem_rsp_bank_].erase(mem_rsp_it);
mem_rd_rsp_active_ = true;
} else {
vl_obj_->device->m_axi_rvalid = 0;
}
}
// send memory write response
if (mem_wr_rsp_active_
&& vl_obj_->device->m_axi_bvalid && mem_wr_rsp_ready_) {
mem_wr_rsp_active_ = false;
}
if (!mem_wr_rsp_active_) {
if (has_wr_response) {
auto mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin();
/*
printf("%0ld: [sim] MEM Wr Rsp: bank=%d, addr=%0lx\n", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr);
*/
vl_obj_->device->m_axi_bvalid = 1;
vl_obj_->device->m_axi_bid = mem_rsp_it->tag;
vl_obj_->device->m_axi_bresp = 0;
mem_rsp_vec_[last_mem_rsp_bank_].erase(mem_rsp_it);
mem_wr_rsp_active_ = true;
} else {
vl_obj_->device->m_axi_bvalid = 0;
}
}
// select the memory bank
uint32_t req_addr = vl_obj_->device->m_axi_wvalid ? vl_obj_->device->m_axi_awaddr : vl_obj_->device->m_axi_araddr;
uint32_t req_bank = (MEMORY_BANKS >= 2) ? ((req_addr / MEM_BLOCK_SIZE) % MEMORY_BANKS) : 0;
// handle memory stalls
bool mem_stalled = false;
#ifdef ENABLE_MEM_STALLS
if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) {
mem_stalled = true;
} else
if (mem_rsp_vec_[req_bank].size() >= MEM_RQ_SIZE) {
mem_stalled = true;
}
#endif
// process memory requests
if (!mem_stalled) {
if (vl_obj_->device->m_axi_wvalid || vl_obj_->device->m_axi_arvalid) {
if (vl_obj_->device->m_axi_wvalid) {
uint64_t byteen = vl_obj_->device->m_axi_wstrb;
unsigned base_addr = vl_obj_->device->m_axi_awaddr;
uint8_t* data = (uint8_t*)(vl_obj_->device->m_axi_wdata);
// detect stdout write
if (base_addr >= IO_COUT_ADDR
&& base_addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) {
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
auto& ss_buf = print_bufs_[i];
char c = data[i];
ss_buf << c;
if (c == '\n') {
std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush;
ss_buf.str("");
}
}
}
} else {
/*
printf("%0ld: [sim] MEM Wr: addr=%0x, byteen=%0lx, data=", timestamp, base_addr, byteen);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]);
}
printf("\n");
*/
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
(*ram_)[base_addr + i] = data[i];
}
}
mem_req_t mem_req;
mem_req.tag = vl_obj_->device->m_axi_arid;
mem_req.addr = vl_obj_->device->m_axi_araddr;
mem_req.cycles_left = 0;
mem_req.write = 1;
mem_rsp_vec_[req_bank].emplace_back(mem_req);
}
} else {
mem_req_t mem_req;
mem_req.tag = vl_obj_->device->m_axi_arid;
mem_req.addr = vl_obj_->device->m_axi_araddr;
ram_->read(mem_req.block.data(), vl_obj_->device->m_axi_araddr, MEM_BLOCK_SIZE);
mem_req.cycles_left = MEM_LATENCY;
mem_req.write = 0;
for (auto& rsp : mem_rsp_vec_[req_bank]) {
if (mem_req.addr == rsp.addr) {
// duplicate requests receive the same cycle delay
mem_req.cycles_left = rsp.cycles_left;
break;
}
}
mem_rsp_vec_[req_bank].emplace_back(mem_req);
}
}
}
vl_obj_->device->m_axi_wready = !mem_stalled;
vl_obj_->device->m_axi_awready = !mem_stalled;
vl_obj_->device->m_axi_arready = !mem_stalled;
}
#else
void Simulator::reset_mem_bus() {
vl_obj_->device->mem_req_ready = 0;
vl_obj_->device->mem_rsp_valid = 0;
}
void Simulator::eval_mem_bus(bool clk) {
if (!clk) {
mem_rd_rsp_ready_ = vl_obj_->device->mem_rsp_ready;
return;
}
if (ram_ == nullptr) {
vl_obj_->device->mem_req_ready = 0;
return;
}
// update memory responses schedule
for (int b = 0; b < MEMORY_BANKS; ++b) {
for (auto& rsp : mem_rsp_vec_[b]) {
if (rsp.cycles_left > 0)
rsp.cycles_left -= 1;
}
}
bool has_response = false;
// schedule memory responses that are ready
for (int i = 0; i < MEMORY_BANKS; ++i) {
uint32_t b = (i + last_mem_rsp_bank_ + 1) % MEMORY_BANKS;
if (!mem_rsp_vec_[b].empty()
&& (mem_rsp_vec_[b].begin()->cycles_left) <= 0) {
has_response = true;
last_mem_rsp_bank_ = b;
break;
}
}
// send memory response
if (mem_rd_rsp_active_
&& vl_obj_->device->mem_rsp_valid && mem_rd_rsp_ready_) {
mem_rd_rsp_active_ = false;
}
if (!mem_rd_rsp_active_) {
if (has_response) {
vl_obj_->device->mem_rsp_valid = 1;
auto mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin();
/*
printf("%0ld: [sim] MEM Rd: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
printf("%02x", mem_rsp_it->block[(MEM_BLOCK_SIZE-1)-i]);
}
printf("\n");
*/
memcpy((uint8_t*)vl_obj_->device->mem_rsp_data, mem_rsp_it->block.data(), MEM_BLOCK_SIZE);
vl_obj_->device->mem_rsp_tag = mem_rsp_it->tag;
mem_rsp_vec_[last_mem_rsp_bank_].erase(mem_rsp_it);
mem_rd_rsp_active_ = true;
} else {
vl_obj_->device->mem_rsp_valid = 0;
}
}
// select the memory bank
uint32_t req_bank = (MEMORY_BANKS >= 2) ? (vl_obj_->device->mem_req_addr % MEMORY_BANKS) : 0;
// handle memory stalls
bool mem_stalled = false;
#ifdef ENABLE_MEM_STALLS
if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) {
mem_stalled = true;
} else
if (mem_rsp_vec_[req_bank].size() >= MEM_RQ_SIZE) {
mem_stalled = true;
}
#endif
// process memory requests
if (!mem_stalled) {
if (vl_obj_->device->mem_req_valid) {
if (vl_obj_->device->mem_req_rw) {
uint64_t byteen = vl_obj_->device->mem_req_byteen;
unsigned base_addr = (vl_obj_->device->mem_req_addr * MEM_BLOCK_SIZE);
uint8_t* data = (uint8_t*)(vl_obj_->device->mem_req_data);
if (base_addr >= IO_COUT_ADDR
&& base_addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) {
for (int i = 0; i < IO_COUT_SIZE; i++) {
if ((byteen >> i) & 0x1) {
auto& ss_buf = print_bufs_[i];
char c = data[i];
ss_buf << c;
if (c == '\n') {
std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush;
ss_buf.str("");
}
}
}
} else {
/*
printf("%0ld: [sim] MEM Wr: addr=%0x, byteen=%0lx, data=", timestamp, base_addr, byteen);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]);
}
printf("\n");
*/
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
(*ram_)[base_addr + i] = data[i];
}
}
}
} else {
mem_req_t mem_req;
mem_req.tag = vl_obj_->device->mem_req_tag;
mem_req.addr = (vl_obj_->device->mem_req_addr * MEM_BLOCK_SIZE);
ram_->read(mem_req.block.data(), vl_obj_->device->mem_req_addr * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE);
mem_req.cycles_left = MEM_LATENCY;
for (auto& rsp : mem_rsp_vec_[req_bank]) {
if (mem_req.addr == rsp.addr) {
// duplicate requests receive the same cycle delay
mem_req.cycles_left = rsp.cycles_left;
break;
}
}
mem_rsp_vec_[req_bank].emplace_back(mem_req);
}
}
}
vl_obj_->device->mem_req_ready = !mem_stalled;
}
#endif
void Simulator::wait(uint32_t cycles) {
for (int i = 0; i < cycles; ++i) {
this->step();
}
}
bool Simulator::is_busy() const {
return vl_obj_->device->busy;
}
int Simulator::run() {
int exitcode = 0;
#ifndef NDEBUG
std::cout << std::dec << timestamp << ": [sim] run()" << std::endl;
#endif
// execute program
while (vl_obj_->device->busy) {
if (get_ebreak()) {
exitcode = get_last_wb_value(3);
break;
}
this->step();
}
// wait 5 cycles to flush the pipeline
this->wait(5);
return exitcode;
}
bool Simulator::get_ebreak() const {
#ifdef AXI_BUS
return (int)vl_obj_->device->Vortex_axi->vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->execute->ebreak;
#else
return (int)vl_obj_->device->Vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->execute->ebreak;
#endif
}
int Simulator::get_last_wb_value(int reg) const {
#ifdef AXI_BUS
return (int)vl_obj_->device->Vortex_axi->vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
#else
return (int)vl_obj_->device->Vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
#endif
}
void Simulator::print_stats(std::ostream& out) {
out << std::left;
out << std::setw(24) << "# of total cycles:" << std::dec << timestamp/2 << std::endl;
}

View File

@@ -1,81 +0,0 @@
#pragma once
#include <VX_config.h>
#include <ostream>
#include <list>
#include <vector>
#include <sstream>
#include <unordered_map>
#ifndef MEMORY_BANKS
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
#define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS
#else
#define MEMORY_BANKS 2
#endif
#endif
namespace vortex {
class VL_OBJ;
class RAM;
class Simulator {
public:
Simulator();
virtual ~Simulator();
void attach_ram(RAM* ram);
bool is_busy() const;
void reset();
void step();
void wait(uint32_t cycles);
int run();
void print_stats(std::ostream& out);
private:
typedef struct {
int cycles_left;
std::array<uint8_t, MEM_BLOCK_SIZE> block;
uint64_t addr;
uint64_t tag;
bool write;
} mem_req_t;
std::unordered_map<int, std::stringstream> print_bufs_;
void eval();
#ifdef AXI_BUS
void reset_axi_bus();
void eval_axi_bus(bool clk);
#else
void reset_mem_bus();
void eval_mem_bus(bool clk);
#endif
int get_last_wb_value(int reg) const;
bool get_ebreak() const;
std::list<mem_req_t> mem_rsp_vec_ [MEMORY_BANKS];
uint32_t last_mem_rsp_bank_;
bool mem_rd_rsp_active_;
bool mem_rd_rsp_ready_;
bool mem_wr_rsp_active_;
bool mem_wr_rsp_ready_;
RAM *ram_;
VL_OBJ* vl_obj_;
};
}