using ramulator dram simulator
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
DESTDIR ?= .
|
||||
RTL_DIR = ../hw/rtl
|
||||
THIRD_PARTY_DIR = ../../third_party
|
||||
|
||||
@@ -5,15 +6,17 @@ CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I. -I../common -I../../hw
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)/softfloat/source/include
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)/cocogfx/include
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a -L$(THIRD_PARTY_DIR)/cocogfx -lcocogfx
|
||||
LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
|
||||
LDFLAGS += -L$(THIRD_PARTY_DIR)/cocogfx -lcocogfx
|
||||
LDFLAGS += -L$(THIRD_PARTY_DIR)/ramulator -lramulator
|
||||
|
||||
TOP = vx_cache_sim
|
||||
|
||||
SRCS = ../common/util.cpp ../common/mem.cpp ../common/rvfloats.cpp
|
||||
SRCS += args.cpp cache.cpp memsim.cpp warp.cpp core.cpp decode.cpp execute.cpp exeunit.cpp tex_unit.cpp processor.cpp main.cpp
|
||||
SRCS = ../common/util.cpp ../common/mem.cpp ../common/rvfloats.cpp
|
||||
SRCS += args.cpp cache.cpp memsim.cpp warp.cpp core.cpp decode.cpp execute.cpp exeunit.cpp tex_unit.cpp processor.cpp
|
||||
|
||||
OBJS := $(patsubst %.cpp, obj_dir/%.o, $(notdir $(SRCS)))
|
||||
VPATH := $(sort $(dir $(SRCS)))
|
||||
@@ -30,23 +33,16 @@ endif
|
||||
|
||||
PROJECT = simx
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
all: $(DESTDIR)/$(PROJECT)
|
||||
|
||||
$(DESTDIR)/$(PROJECT): $(SRCS) main.cpp
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
obj_dir/%.o: %.cpp
|
||||
mkdir -p obj_dir
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
static: $(OBJS)
|
||||
$(AR) rcs lib$(PROJECT).a $(OBJS) $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/*.o
|
||||
$(DESTDIR)/lib$(PROJECT).so: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ -shared $(LDFLAGS) -o $@
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean-static:
|
||||
rm -rf lib$(PROJECT).a obj_dir .depend
|
||||
|
||||
clean: clean-static
|
||||
rm -rf $(PROJECT)
|
||||
clean:
|
||||
rm -rf obj_dir $(DESTDIR)/$(PROJECT) $(DESTDIR)/lib$(PROJECT).so
|
||||
@@ -116,6 +116,7 @@ struct bank_req_t {
|
||||
bool mshr_replay;
|
||||
uint64_t tag;
|
||||
uint32_t set_id;
|
||||
uint32_t core_id;
|
||||
std::vector<bank_req_info_t> infos;
|
||||
|
||||
bank_req_t(uint32_t size)
|
||||
@@ -124,6 +125,7 @@ struct bank_req_t {
|
||||
, mshr_replay(false)
|
||||
, tag(0)
|
||||
, set_id(0)
|
||||
, core_id(0)
|
||||
, infos(size)
|
||||
{}
|
||||
};
|
||||
@@ -292,7 +294,7 @@ public:
|
||||
auto& mem_rsp = bypass_port.front();
|
||||
uint32_t req_id = mem_rsp.tag & ((1 << params_.log2_num_inputs)-1);
|
||||
uint64_t tag = mem_rsp.tag >> params_.log2_num_inputs;
|
||||
MemRsp core_rsp(tag);
|
||||
MemRsp core_rsp{tag, mem_rsp.core_id};
|
||||
simobject_->CoreRspPorts.at(req_id).send(core_rsp, config_.latency);
|
||||
bypass_port.pop();
|
||||
}
|
||||
@@ -327,7 +329,7 @@ public:
|
||||
auto& core_req = core_req_port.front();
|
||||
|
||||
// check cache bypassing
|
||||
if (core_req.is_io) {
|
||||
if (core_req.non_cacheable) {
|
||||
// send IO request
|
||||
this->processIORequest(core_req, req_id);
|
||||
|
||||
@@ -348,6 +350,7 @@ public:
|
||||
bank_req.mshr_replay = false;
|
||||
bank_req.tag = tag;
|
||||
bank_req.set_id = set_id;
|
||||
bank_req.core_id = core_req.core_id;
|
||||
bank_req.infos.at(port_id) = {true, req_id, core_req.tag};
|
||||
|
||||
auto& bank = banks_.at(bank_id);
|
||||
@@ -439,7 +442,8 @@ public:
|
||||
if (pipeline_req.mshr_replay) {
|
||||
// send core response
|
||||
for (auto& info : pipeline_req.infos) {
|
||||
simobject_->CoreRspPorts.at(info.req_id).send(MemRsp{info.req_tag}, config_.latency);
|
||||
MemRsp core_rsp{info.req_tag, pipeline_req.core_id};
|
||||
simobject_->CoreRspPorts.at(info.req_id).send(core_rsp, config_.latency);
|
||||
}
|
||||
} else {
|
||||
bool hit = false;
|
||||
@@ -480,6 +484,7 @@ public:
|
||||
MemReq mem_req;
|
||||
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, hit_block.tag);
|
||||
mem_req.write = true;
|
||||
mem_req.core_id = pipeline_req.core_id;
|
||||
mem_req_ports_.at(bank_id).send(mem_req, 1);
|
||||
} else {
|
||||
// mark block as dirty
|
||||
@@ -488,8 +493,9 @@ public:
|
||||
}
|
||||
// send core response
|
||||
if (!pipeline_req.write || config_.write_reponse) {
|
||||
for (auto& info : pipeline_req.infos) {
|
||||
simobject_->CoreRspPorts.at(info.req_id).send(MemRsp{info.req_tag}, config_.latency);
|
||||
for (auto& info : pipeline_req.infos) {
|
||||
MemRsp core_rsp{info.req_tag, pipeline_req.core_id};
|
||||
simobject_->CoreRspPorts.at(info.req_id).send(core_rsp, config_.latency);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -508,6 +514,7 @@ public:
|
||||
MemReq mem_req;
|
||||
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, repl_block.tag);
|
||||
mem_req.write = true;
|
||||
mem_req.core_id = pipeline_req.core_id;
|
||||
mem_req_ports_.at(bank_id).send(mem_req, 1);
|
||||
++perf_stats_.evictions;
|
||||
}
|
||||
@@ -519,12 +526,14 @@ public:
|
||||
MemReq mem_req;
|
||||
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, pipeline_req.tag);
|
||||
mem_req.write = true;
|
||||
mem_req.core_id = pipeline_req.core_id;
|
||||
mem_req_ports_.at(bank_id).send(mem_req, 1);
|
||||
}
|
||||
// send core response
|
||||
if (config_.write_reponse) {
|
||||
for (auto& info : pipeline_req.infos) {
|
||||
simobject_->CoreRspPorts.at(info.req_id).send(MemRsp{info.req_tag}, config_.latency);
|
||||
for (auto& info : pipeline_req.infos) {
|
||||
MemRsp core_rsp{info.req_tag, pipeline_req.core_id};
|
||||
simobject_->CoreRspPorts.at(info.req_id).send(core_rsp, config_.latency);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -540,6 +549,7 @@ public:
|
||||
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, pipeline_req.tag);
|
||||
mem_req.write = false;
|
||||
mem_req.tag = mshr_id;
|
||||
mem_req.core_id = pipeline_req.core_id;
|
||||
mem_req_ports_.at(bank_id).send(mem_req, 1);
|
||||
++pending_fill_reqs_;
|
||||
}
|
||||
|
||||
@@ -2,12 +2,10 @@
|
||||
|
||||
#include "types.h"
|
||||
|
||||
#ifndef MEM_LATENCY
|
||||
#define MEM_LATENCY 24
|
||||
#endif
|
||||
|
||||
#define RAM_PAGE_SIZE 4096
|
||||
|
||||
#define DRAM_CHANNELS 2
|
||||
|
||||
namespace vortex {
|
||||
|
||||
enum Constants {
|
||||
|
||||
@@ -87,12 +87,12 @@ Core::Core(const SimContext& ctx, const ArchDef &arch, Word id)
|
||||
}
|
||||
|
||||
// register execute units
|
||||
exe_units_.at((int)ExeType::NOP) = SimPlatform::instance().CreateObject<NopUnit>(this);
|
||||
exe_units_.at((int)ExeType::ALU) = SimPlatform::instance().CreateObject<AluUnit>(this);
|
||||
exe_units_.at((int)ExeType::LSU) = SimPlatform::instance().CreateObject<LsuUnit>(this);
|
||||
exe_units_.at((int)ExeType::CSR) = SimPlatform::instance().CreateObject<CsrUnit>(this);
|
||||
exe_units_.at((int)ExeType::FPU) = SimPlatform::instance().CreateObject<FpuUnit>(this);
|
||||
exe_units_.at((int)ExeType::GPU) = SimPlatform::instance().CreateObject<GpuUnit>(this);
|
||||
exe_units_.at((int)ExeType::NOP) = SimPlatform::instance().create_object<NopUnit>(this);
|
||||
exe_units_.at((int)ExeType::ALU) = SimPlatform::instance().create_object<AluUnit>(this);
|
||||
exe_units_.at((int)ExeType::LSU) = SimPlatform::instance().create_object<LsuUnit>(this);
|
||||
exe_units_.at((int)ExeType::CSR) = SimPlatform::instance().create_object<CsrUnit>(this);
|
||||
exe_units_.at((int)ExeType::FPU) = SimPlatform::instance().create_object<FpuUnit>(this);
|
||||
exe_units_.at((int)ExeType::GPU) = SimPlatform::instance().create_object<GpuUnit>(this);
|
||||
|
||||
// connect l1 switch
|
||||
icache_->MemReqPort.bind(&l1_mem_switch_->ReqIn[0]);
|
||||
@@ -216,6 +216,7 @@ void Core::fetch(uint64_t cycle) {
|
||||
mem_req.addr = trace->PC;
|
||||
mem_req.write = false;
|
||||
mem_req.tag = pending_icache_.allocate(trace);
|
||||
mem_req.core_id = id_;
|
||||
icache_->CoreReqPorts.at(0).send(mem_req, 1);
|
||||
DT(3, cycle, "icache-req: addr=" << std::hex << mem_req.addr << ", tag=" << mem_req.tag << ", " << *trace);
|
||||
fetch_latch_.pop();
|
||||
|
||||
@@ -403,7 +403,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
||||
break;
|
||||
case JALR_INST:
|
||||
trace->exe_type = ExeType::ALU;
|
||||
trace->alu.type = AluType::BRANCH;
|
||||
trace->alu.type = AluType::BRANCH;
|
||||
trace->used_iregs.set(rsrc0);
|
||||
for (int t = 0; t < num_threads; ++t) {
|
||||
if (!tmask_.test(t))
|
||||
@@ -535,6 +535,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
||||
Word csr_value;
|
||||
if (func3 == 0) {
|
||||
trace->exe_type = ExeType::ALU;
|
||||
trace->alu.type = AluType::SYSCALL;
|
||||
trace->fetch_stall = true;
|
||||
switch (csr_addr) {
|
||||
case 0: // ECALL
|
||||
|
||||
@@ -143,8 +143,9 @@ void LsuUnit::step(uint64_t cycle) {
|
||||
MemReq mem_req;
|
||||
mem_req.addr = mem_addr.addr;
|
||||
mem_req.write = is_write;
|
||||
mem_req.non_cacheable = (type == AddrType::IO);
|
||||
mem_req.tag = tag;
|
||||
mem_req.is_io = (type == AddrType::IO);
|
||||
mem_req.core_id = core_->id();
|
||||
|
||||
if (type == AddrType::Shared) {
|
||||
core_->shared_mem_->Inputs.at(t).send(mem_req, 2);
|
||||
@@ -153,7 +154,7 @@ void LsuUnit::step(uint64_t cycle) {
|
||||
} else {
|
||||
dcache_req_port.send(mem_req, 2);
|
||||
DT(3, cycle, "dcache-req: addr=" << std::hex << mem_addr.addr << ", tag=" << tag
|
||||
<< ", type=" << trace->lsu.type << ", tid=" << t << ", io=" << mem_req.is_io << ", " << *trace);
|
||||
<< ", type=" << trace->lsu.type << ", tid=" << t << ", nc=" << mem_req.non_cacheable << ", " << *trace);
|
||||
}
|
||||
|
||||
if (is_dup)
|
||||
@@ -182,6 +183,7 @@ void AluUnit::step(uint64_t cycle) {
|
||||
switch (trace->alu.type) {
|
||||
case AluType::ARITH:
|
||||
case AluType::BRANCH:
|
||||
case AluType::SYSCALL:
|
||||
case AluType::CMOV:
|
||||
Output.send(trace, 1);
|
||||
break;
|
||||
@@ -359,6 +361,7 @@ bool GpuUnit::processTexRequest(uint64_t cycle, pipeline_trace_t* trace) {
|
||||
mem_req.addr = mem_addr.addr;
|
||||
mem_req.write = (trace->lsu.type == LsuType::STORE);
|
||||
mem_req.tag = tag;
|
||||
mem_req.core_id = core_->id();
|
||||
dcache_req_port.send(mem_req, 3);
|
||||
DT(3, cycle, "tex-req: addr=" << std::hex << mem_addr.addr << ", tag=" << tag
|
||||
<< ", tid=" << t << ", "<< trace);
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
using namespace vortex;
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int exitcode;
|
||||
int exitcode = 0;
|
||||
|
||||
std::string archStr("rv32imf");
|
||||
std::string imgFileName;
|
||||
@@ -54,12 +54,7 @@ int main(int argc, char **argv) {
|
||||
return -1;
|
||||
|
||||
{
|
||||
ArchDef arch(archStr, num_cores, num_warps, num_threads);
|
||||
|
||||
Processor processor(arch);
|
||||
|
||||
RAM ram(RAM_PAGE_SIZE);
|
||||
|
||||
{
|
||||
std::string program_ext(fileExtension(imgFileName.c_str()));
|
||||
if (program_ext == "bin") {
|
||||
@@ -72,25 +67,40 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
}
|
||||
|
||||
processor.attach_ram(&ram);
|
||||
ArchDef arch(archStr, num_cores, num_warps, num_threads);
|
||||
auto processor = Processor::Create(arch);
|
||||
processor->attach_ram(&ram);
|
||||
|
||||
exitcode = processor.run();
|
||||
// setup memory simulator
|
||||
auto memsim = MemSim::Create(MemSim::Config{
|
||||
DRAM_CHANNELS,
|
||||
arch.num_cores()
|
||||
});
|
||||
processor->MemReqPort.bind(&memsim->MemReqPort);
|
||||
memsim->MemRspPort.bind(&processor->MemRspPort);
|
||||
|
||||
if (riscv_test) {
|
||||
if (1 == exitcode) {
|
||||
std::cout << "Passed." << std::endl;
|
||||
exitcode = 0;
|
||||
} else {
|
||||
std::cout << "Failed." << std::endl;
|
||||
}
|
||||
} else {
|
||||
if (exitcode != 0) {
|
||||
std::cout << "*** error: exitcode=" << exitcode << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
// run simulation
|
||||
for (;;) {
|
||||
SimPlatform::instance().step();
|
||||
if (processor->check_exit(&exitcode))
|
||||
break;
|
||||
};
|
||||
}
|
||||
|
||||
SimPlatform::instance().finalize();
|
||||
|
||||
if (riscv_test) {
|
||||
if (1 == exitcode) {
|
||||
std::cout << "Passed." << std::endl;
|
||||
exitcode = 0;
|
||||
} else {
|
||||
std::cout << "Failed." << std::endl;
|
||||
}
|
||||
} else {
|
||||
if (exitcode != 0) {
|
||||
std::cout << "*** error: exitcode=" << exitcode << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
return exitcode;
|
||||
}
|
||||
|
||||
@@ -1,56 +1,99 @@
|
||||
#include "memsim.h"
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
#include <stdlib.h>
|
||||
|
||||
DISABLE_WARNING_PUSH
|
||||
DISABLE_WARNING_UNUSED_PARAMETER
|
||||
#define RAMULATOR
|
||||
#include <ramulator/src/Gem5Wrapper.h>
|
||||
#include <ramulator/src/Request.h>
|
||||
#include <ramulator/src/Statistics.h>
|
||||
DISABLE_WARNING_POP
|
||||
|
||||
#include "constants.h"
|
||||
#include "types.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
class MemSim::Impl {
|
||||
private:
|
||||
MemSim* simobject_;
|
||||
uint32_t num_banks_;
|
||||
uint32_t latency_;
|
||||
Config config_;
|
||||
PerfStats perf_stats_;
|
||||
ramulator::Gem5Wrapper* dram_;
|
||||
|
||||
public:
|
||||
Impl(MemSim* simobject, uint32_t num_banks, uint32_t latency)
|
||||
|
||||
Impl(MemSim* simobject, const Config& config)
|
||||
: simobject_(simobject)
|
||||
, num_banks_(num_banks)
|
||||
, latency_(latency)
|
||||
{}
|
||||
, config_(config)
|
||||
{
|
||||
ramulator::Config ram_config;
|
||||
ram_config.add("standard", "DDR4");
|
||||
ram_config.add("channels", std::to_string(config.channels));
|
||||
ram_config.add("ranks", "1");
|
||||
ram_config.add("speed", "DDR4_2400R");
|
||||
ram_config.add("org", "DDR4_4Gb_x8");
|
||||
ram_config.add("mapping", "defaultmapping");
|
||||
ram_config.set_core_num(config.num_cores);
|
||||
dram_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
|
||||
Stats::statlist.output("ramulator.ddr4.log");
|
||||
}
|
||||
|
||||
~Impl() {
|
||||
dram_->finish();
|
||||
Stats::statlist.printall();
|
||||
delete dram_;
|
||||
}
|
||||
|
||||
const PerfStats& perf_stats() const {
|
||||
return perf_stats_;
|
||||
}
|
||||
|
||||
void dram_callback(ramulator::Request& req, uint32_t tag) {
|
||||
MemRsp mem_rsp{tag, (uint32_t)req.coreid};
|
||||
simobject_->MemRspPort.send(mem_rsp, 1);
|
||||
}
|
||||
|
||||
void step(uint64_t /*cycle*/) {
|
||||
for (uint32_t i = 0, n = num_banks_; i < n; ++i) {
|
||||
auto& mem_req_port = simobject_->MemReqPorts.at(i);
|
||||
if (mem_req_port.empty())
|
||||
continue;
|
||||
auto& mem_req = mem_req_port.front();
|
||||
if (!mem_req.write) {
|
||||
MemRsp mem_rsp;
|
||||
mem_rsp.tag = mem_req.tag;
|
||||
simobject_->MemRspPorts.at(i).send(mem_rsp, latency_);
|
||||
++perf_stats_.reads;
|
||||
} else {
|
||||
++perf_stats_.writes;
|
||||
}
|
||||
mem_req_port.pop();
|
||||
dram_->tick();
|
||||
|
||||
if (simobject_->MemReqPort.empty())
|
||||
return;
|
||||
|
||||
auto& mem_req = simobject_->MemReqPort.front();
|
||||
|
||||
if (mem_req.write) {
|
||||
ramulator::Request dram_req(
|
||||
mem_req.addr,
|
||||
ramulator::Request::Type::WRITE,
|
||||
mem_req.core_id
|
||||
);
|
||||
dram_->send(dram_req);
|
||||
++perf_stats_.writes;
|
||||
} else {
|
||||
ramulator::Request dram_req(
|
||||
mem_req.addr,
|
||||
ramulator::Request::Type::READ,
|
||||
std::bind(&Impl::dram_callback, this, placeholders::_1, mem_req.tag),
|
||||
mem_req.core_id
|
||||
);
|
||||
dram_->send(dram_req);
|
||||
++perf_stats_.reads;
|
||||
}
|
||||
|
||||
simobject_->MemReqPort.pop();
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
MemSim::MemSim(const SimContext& ctx,
|
||||
uint32_t num_banks,
|
||||
uint32_t latency)
|
||||
MemSim::MemSim(const SimContext& ctx, const Config& config)
|
||||
: SimObject<MemSim>(ctx, "MemSim")
|
||||
, MemReqPorts(num_banks, this)
|
||||
, MemRspPorts(num_banks, this)
|
||||
, impl_(new Impl(this, num_banks, latency))
|
||||
, MemReqPort(this)
|
||||
, MemRspPort(this)
|
||||
, impl_(new Impl(this, config))
|
||||
{}
|
||||
|
||||
MemSim::~MemSim() {
|
||||
|
||||
@@ -8,6 +8,11 @@ namespace vortex {
|
||||
|
||||
class MemSim : public SimObject<MemSim>{
|
||||
public:
|
||||
struct Config {
|
||||
uint32_t channels;
|
||||
uint32_t num_cores;
|
||||
};
|
||||
|
||||
struct PerfStats {
|
||||
uint64_t reads;
|
||||
uint64_t writes;
|
||||
@@ -18,10 +23,10 @@ public:
|
||||
{}
|
||||
};
|
||||
|
||||
std::vector<SimPort<MemReq>> MemReqPorts;
|
||||
std::vector<SimPort<MemRsp>> MemRspPorts;
|
||||
SimPort<MemReq> MemReqPort;
|
||||
SimPort<MemRsp> MemRspPort;
|
||||
|
||||
MemSim(const SimContext& ctx, uint32_t num_banks, uint32_t latency);
|
||||
MemSim(const SimContext& ctx, const Config& config);
|
||||
~MemSim();
|
||||
|
||||
void step(uint64_t cycle);
|
||||
|
||||
@@ -3,147 +3,173 @@
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
Processor::Processor(const ArchDef& arch)
|
||||
: cores_(arch.num_cores())
|
||||
, l2caches_(NUM_CLUSTERS)
|
||||
, l2_mem_switches_(NUM_CLUSTERS)
|
||||
{
|
||||
uint32_t num_cores = arch.num_cores();
|
||||
uint32_t cores_per_cluster = num_cores / NUM_CLUSTERS;
|
||||
class Processor::Impl {
|
||||
private:
|
||||
Processor* simobject_;
|
||||
std::vector<Core::Ptr> cores_;
|
||||
std::vector<Cache::Ptr> l2caches_;
|
||||
std::vector<Switch<MemReq, MemRsp>::Ptr> l2_mem_switches_;
|
||||
Cache::Ptr l3cache_;
|
||||
Switch<MemReq, MemRsp>::Ptr l3_mem_switch_;
|
||||
|
||||
// create cores
|
||||
for (uint32_t i = 0; i < num_cores; ++i) {
|
||||
cores_.at(i) = Core::Create(arch, i);
|
||||
}
|
||||
public:
|
||||
Impl(Processor* simobject, const ArchDef& arch)
|
||||
: simobject_(simobject)
|
||||
, cores_(arch.num_cores())
|
||||
, l2caches_(NUM_CLUSTERS)
|
||||
, l2_mem_switches_(NUM_CLUSTERS)
|
||||
{
|
||||
uint32_t num_cores = arch.num_cores();
|
||||
uint32_t cores_per_cluster = num_cores / NUM_CLUSTERS;
|
||||
|
||||
// connect memory sub-systen
|
||||
memsim_ = MemSim::Create(1, MEM_LATENCY);
|
||||
std::vector<SimPort<MemReq>*> mem_req_ports(1);
|
||||
std::vector<SimPort<MemRsp>*> mem_rsp_ports(1);
|
||||
|
||||
mem_req_ports.at(0) = &memsim_->MemReqPorts.at(0);
|
||||
mem_rsp_ports.at(0) = &memsim_->MemRspPorts.at(0);
|
||||
|
||||
if (L3_ENABLE) {
|
||||
l3cache_ = Cache::Create("l3cache", Cache::Config{
|
||||
log2ceil(L3_CACHE_SIZE), // C
|
||||
log2ceil(MEM_BLOCK_SIZE), // B
|
||||
2, // W
|
||||
0, // A
|
||||
32, // address bits
|
||||
L3_NUM_BANKS, // number of banks
|
||||
L3_NUM_PORTS, // number of ports
|
||||
NUM_CLUSTERS, // request size
|
||||
true, // write-through
|
||||
false, // write response
|
||||
0, // victim size
|
||||
L3_MSHR_SIZE, // mshr
|
||||
2, // pipeline latency
|
||||
}
|
||||
);
|
||||
|
||||
mem_rsp_ports.at(0)->bind(&l3cache_->MemRspPort);
|
||||
l3cache_->MemReqPort.bind(mem_req_ports.at(0));
|
||||
|
||||
mem_req_ports.resize(NUM_CLUSTERS);
|
||||
mem_rsp_ports.resize(NUM_CLUSTERS);
|
||||
|
||||
for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) {
|
||||
mem_req_ports.at(i) = &l3cache_->CoreReqPorts.at(i);
|
||||
mem_rsp_ports.at(i) = &l3cache_->CoreRspPorts.at(i);
|
||||
// create cores
|
||||
for (uint32_t i = 0; i < num_cores; ++i) {
|
||||
cores_.at(i) = Core::Create(arch, i);
|
||||
}
|
||||
} else if (NUM_CLUSTERS > 1) {
|
||||
l3_mem_switch_ = Switch<MemReq, MemRsp>::Create("l3_arb", ArbiterType::RoundRobin, NUM_CLUSTERS);
|
||||
mem_rsp_ports.at(0)->bind(&l3_mem_switch_->RspIn);
|
||||
l3_mem_switch_->ReqOut.bind(mem_req_ports.at(0));
|
||||
|
||||
std::vector<SimPort<MemReq>*> mem_req_ports(1);
|
||||
std::vector<SimPort<MemRsp>*> mem_rsp_ports(1);
|
||||
|
||||
mem_req_ports.resize(NUM_CLUSTERS);
|
||||
mem_rsp_ports.resize(NUM_CLUSTERS);
|
||||
mem_req_ports.at(0) = &simobject_->MemReqPort;
|
||||
mem_rsp_ports.at(0) = &simobject_->MemRspPort;
|
||||
|
||||
for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) {
|
||||
mem_req_ports.at(i) = &l3_mem_switch_->ReqIn.at(i);
|
||||
mem_rsp_ports.at(i) = &l3_mem_switch_->RspOut.at(i);
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) {
|
||||
std::vector<SimPort<MemReq>*> cluster_mem_req_ports(cores_per_cluster);
|
||||
std::vector<SimPort<MemRsp>*> cluster_mem_rsp_ports(cores_per_cluster);
|
||||
|
||||
if (L2_ENABLE) {
|
||||
auto& l2cache = l2caches_.at(i);
|
||||
l2cache = Cache::Create("l2cache", Cache::Config{
|
||||
log2ceil(L2_CACHE_SIZE), // C
|
||||
if (L3_ENABLE) {
|
||||
l3cache_ = Cache::Create("l3cache", Cache::Config{
|
||||
log2ceil(L3_CACHE_SIZE), // C
|
||||
log2ceil(MEM_BLOCK_SIZE), // B
|
||||
2, // W
|
||||
0, // A
|
||||
32, // address bits
|
||||
L2_NUM_BANKS, // number of banks
|
||||
L2_NUM_PORTS, // number of ports
|
||||
(uint8_t)cores_per_cluster, // request size
|
||||
32, // address bits
|
||||
L3_NUM_BANKS, // number of banks
|
||||
L3_NUM_PORTS, // number of ports
|
||||
NUM_CLUSTERS, // request size
|
||||
true, // write-through
|
||||
false, // write response
|
||||
0, // victim size
|
||||
L2_MSHR_SIZE, // mshr
|
||||
L3_MSHR_SIZE, // mshr
|
||||
2, // pipeline latency
|
||||
});
|
||||
}
|
||||
);
|
||||
l3cache_->MemReqPort.bind(mem_req_ports.at(0));
|
||||
mem_rsp_ports.at(0)->bind(&l3cache_->MemRspPort);
|
||||
|
||||
mem_rsp_ports.at(i)->bind(&l2cache->MemRspPort);
|
||||
l2cache->MemReqPort.bind(mem_req_ports.at(i));
|
||||
mem_req_ports.resize(NUM_CLUSTERS);
|
||||
mem_rsp_ports.resize(NUM_CLUSTERS);
|
||||
|
||||
for (uint32_t j = 0; j < cores_per_cluster; ++j) {
|
||||
cluster_mem_req_ports.at(j) = &l2cache->CoreReqPorts.at(j);
|
||||
cluster_mem_rsp_ports.at(j) = &l2cache->CoreRspPorts.at(j);
|
||||
for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) {
|
||||
mem_req_ports.at(i) = &l3cache_->CoreReqPorts.at(i);
|
||||
mem_rsp_ports.at(i) = &l3cache_->CoreRspPorts.at(i);
|
||||
}
|
||||
} else {
|
||||
auto& l2_mem_switch = l2_mem_switches_.at(i);
|
||||
l2_mem_switch = Switch<MemReq, MemRsp>::Create("l2_arb", ArbiterType::RoundRobin, cores_per_cluster);
|
||||
} else if (NUM_CLUSTERS > 1) {
|
||||
l3_mem_switch_ = Switch<MemReq, MemRsp>::Create("l3_arb", ArbiterType::RoundRobin, NUM_CLUSTERS);
|
||||
l3_mem_switch_->ReqOut.bind(mem_req_ports.at(0));
|
||||
mem_rsp_ports.at(0)->bind(&l3_mem_switch_->RspIn);
|
||||
|
||||
mem_rsp_ports.at(i)->bind(&l2_mem_switch->RspIn);
|
||||
l2_mem_switch->ReqOut.bind(mem_req_ports.at(i));
|
||||
mem_req_ports.resize(NUM_CLUSTERS);
|
||||
mem_rsp_ports.resize(NUM_CLUSTERS);
|
||||
|
||||
for (uint32_t j = 0; j < cores_per_cluster; ++j) {
|
||||
cluster_mem_req_ports.at(j) = &l2_mem_switch->ReqIn.at(j);
|
||||
cluster_mem_rsp_ports.at(j) = &l2_mem_switch->RspOut.at(j);
|
||||
for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) {
|
||||
mem_req_ports.at(i) = &l3_mem_switch_->ReqIn.at(i);
|
||||
mem_rsp_ports.at(i) = &l3_mem_switch_->RspOut.at(i);
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t j = 0; j < cores_per_cluster; ++j) {
|
||||
auto& core = cores_.at((i * cores_per_cluster) + j);
|
||||
cluster_mem_rsp_ports.at(j)->bind(&core->MemRspPort);
|
||||
core->MemReqPort.bind(cluster_mem_req_ports.at(j));
|
||||
for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) {
|
||||
std::vector<SimPort<MemReq>*> cluster_mem_req_ports(cores_per_cluster);
|
||||
std::vector<SimPort<MemRsp>*> cluster_mem_rsp_ports(cores_per_cluster);
|
||||
|
||||
if (L2_ENABLE) {
|
||||
auto& l2cache = l2caches_.at(i);
|
||||
l2cache = Cache::Create("l2cache", Cache::Config{
|
||||
log2ceil(L2_CACHE_SIZE), // C
|
||||
log2ceil(MEM_BLOCK_SIZE), // B
|
||||
2, // W
|
||||
0, // A
|
||||
32, // address bits
|
||||
L2_NUM_BANKS, // number of banks
|
||||
L2_NUM_PORTS, // number of ports
|
||||
(uint8_t)cores_per_cluster, // request size
|
||||
true, // write-through
|
||||
false, // write response
|
||||
0, // victim size
|
||||
L2_MSHR_SIZE, // mshr
|
||||
2, // pipeline latency
|
||||
});
|
||||
l2cache->MemReqPort.bind(mem_req_ports.at(i));
|
||||
mem_rsp_ports.at(i)->bind(&l2cache->MemRspPort);
|
||||
|
||||
for (uint32_t j = 0; j < cores_per_cluster; ++j) {
|
||||
cluster_mem_req_ports.at(j) = &l2cache->CoreReqPorts.at(j);
|
||||
cluster_mem_rsp_ports.at(j) = &l2cache->CoreRspPorts.at(j);
|
||||
}
|
||||
} else {
|
||||
auto& l2_mem_switch = l2_mem_switches_.at(i);
|
||||
l2_mem_switch = Switch<MemReq, MemRsp>::Create("l2_arb", ArbiterType::RoundRobin, cores_per_cluster);
|
||||
l2_mem_switch->ReqOut.bind(mem_req_ports.at(i));
|
||||
mem_rsp_ports.at(i)->bind(&l2_mem_switch->RspIn);
|
||||
|
||||
for (uint32_t j = 0; j < cores_per_cluster; ++j) {
|
||||
cluster_mem_req_ports.at(j) = &l2_mem_switch->ReqIn.at(j);
|
||||
cluster_mem_rsp_ports.at(j) = &l2_mem_switch->RspOut.at(j);
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t j = 0; j < cores_per_cluster; ++j) {
|
||||
auto& core = cores_.at((i * cores_per_cluster) + j);
|
||||
core->MemReqPort.bind(cluster_mem_req_ports.at(j));
|
||||
cluster_mem_rsp_ports.at(j)->bind(&core->MemRspPort);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Processor::attach_ram(RAM* ram) {
|
||||
for (auto core : cores_) {
|
||||
core->attach_ram(ram);
|
||||
~Impl() {}
|
||||
|
||||
void step(uint64_t cycle) {
|
||||
__unused (cycle);
|
||||
}
|
||||
}
|
||||
|
||||
Processor::~Processor() {}
|
||||
void attach_ram(RAM* ram) {
|
||||
for (auto core : cores_) {
|
||||
core->attach_ram(ram);
|
||||
}
|
||||
}
|
||||
|
||||
int Processor::run() {
|
||||
bool running;
|
||||
int exitcode = 0;
|
||||
do {
|
||||
SimPlatform::instance().step();
|
||||
|
||||
running = false;
|
||||
bool check_exit(int* exitcode) {
|
||||
bool running = false;
|
||||
for (auto& core : cores_) {
|
||||
if (core->running()) {
|
||||
running = true;
|
||||
}
|
||||
if (core->check_exit()) {
|
||||
exitcode = core->getIRegValue(3);
|
||||
running = false;
|
||||
break;
|
||||
*exitcode = core->getIRegValue(3);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} while (running);
|
||||
return !running;
|
||||
}
|
||||
};
|
||||
|
||||
std::cout << std::flush;
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
return exitcode;
|
||||
Processor::Processor(const SimContext& ctx, const ArchDef& arch)
|
||||
: SimObject<Processor>(ctx, "Vortex")
|
||||
, MemReqPort(this)
|
||||
, MemRspPort(this)
|
||||
, impl_(new Impl(this, arch))
|
||||
{}
|
||||
|
||||
Processor::~Processor() {
|
||||
delete impl_;
|
||||
}
|
||||
|
||||
void Processor::attach_ram(RAM* mem) {
|
||||
impl_->attach_ram(mem);
|
||||
}
|
||||
|
||||
bool Processor::check_exit(int* exitcode) {
|
||||
return impl_->check_exit(exitcode);
|
||||
}
|
||||
|
||||
void Processor::step(uint64_t cycle) {
|
||||
impl_->step(cycle);
|
||||
}
|
||||
@@ -4,24 +4,23 @@
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Processor {
|
||||
class Processor : public SimObject<Processor> {
|
||||
public:
|
||||
typedef std::shared_ptr<Processor> Ptr;
|
||||
SimPort<MemReq> MemReqPort;
|
||||
SimPort<MemRsp> MemRspPort;
|
||||
|
||||
Processor(const ArchDef& arch);
|
||||
Processor(const SimContext& ctx, const ArchDef& arch);
|
||||
~Processor();
|
||||
|
||||
void attach_ram(RAM* mem);
|
||||
|
||||
int run();
|
||||
bool check_exit(int* exitcode);
|
||||
|
||||
void step(uint64_t cycle);
|
||||
|
||||
private:
|
||||
std::vector<Core::Ptr> cores_;
|
||||
std::vector<Cache::Ptr> l2caches_;
|
||||
std::vector<Switch<MemReq, MemRsp>::Ptr> l2_mem_switches_;
|
||||
Cache::Ptr l3cache_;
|
||||
Switch<MemReq, MemRsp>::Ptr l3_mem_switch_;
|
||||
MemSim::Ptr memsim_;
|
||||
class Impl;
|
||||
Impl* impl_;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -65,8 +65,7 @@ public:
|
||||
|
||||
if (!core_req.write || config_.write_reponse) {
|
||||
// send response
|
||||
MemRsp core_rsp;
|
||||
core_rsp.tag = core_req.tag;
|
||||
MemRsp core_rsp{core_req.tag, core_req.core_id};
|
||||
this->Outputs.at(req_id).send(core_rsp, 1);
|
||||
}
|
||||
|
||||
|
||||
@@ -70,6 +70,7 @@ inline std::ostream &operator<<(std::ostream &os, const ExeType& type) {
|
||||
enum class AluType {
|
||||
ARITH,
|
||||
BRANCH,
|
||||
SYSCALL,
|
||||
IMUL,
|
||||
IDIV,
|
||||
CMOV,
|
||||
@@ -77,11 +78,12 @@ enum class AluType {
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const AluType& type) {
|
||||
switch (type) {
|
||||
case AluType::ARITH: os << "ARITH"; break;
|
||||
case AluType::BRANCH: os << "BRANCH"; break;
|
||||
case AluType::IMUL: os << "IMUL"; break;
|
||||
case AluType::IDIV: os << "IDIV"; break;
|
||||
case AluType::CMOV: os << "CMOV"; break;
|
||||
case AluType::ARITH: os << "ARITH"; break;
|
||||
case AluType::BRANCH: os << "BRANCH"; break;
|
||||
case AluType::SYSCALL: os << "SYSCALL"; break;
|
||||
case AluType::IMUL: os << "IMUL"; break;
|
||||
case AluType::IDIV: os << "IDIV"; break;
|
||||
case AluType::CMOV: os << "CMOV"; break;
|
||||
}
|
||||
return os;
|
||||
}
|
||||
@@ -207,24 +209,31 @@ inline std::ostream &operator<<(std::ostream &os, const ArbiterType& type) {
|
||||
|
||||
struct MemReq {
|
||||
uint64_t addr;
|
||||
uint32_t tag;
|
||||
bool write;
|
||||
bool is_io;
|
||||
bool non_cacheable;
|
||||
uint32_t tag;
|
||||
uint32_t core_id;
|
||||
|
||||
MemReq(uint64_t _addr = 0,
|
||||
bool _write = false,
|
||||
bool _non_cacheable = false,
|
||||
uint64_t _tag = 0,
|
||||
bool _write = false,
|
||||
bool _is_io = false
|
||||
uint32_t _core_id = 0
|
||||
) : addr(_addr)
|
||||
, tag(_tag)
|
||||
, write(_write)
|
||||
, is_io(_is_io)
|
||||
, non_cacheable(_non_cacheable)
|
||||
, tag(_tag)
|
||||
, core_id(_core_id)
|
||||
{}
|
||||
};
|
||||
|
||||
struct MemRsp {
|
||||
uint64_t tag;
|
||||
MemRsp(uint64_t _tag = 0) : tag (_tag) {}
|
||||
uint32_t core_id;
|
||||
MemRsp(uint64_t _tag = 0, uint32_t _core_id = 0)
|
||||
: tag (_tag)
|
||||
, core_id(_core_id)
|
||||
{}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Reference in New Issue
Block a user