SimX timing simulation
This commit is contained in:
@@ -12,34 +12,92 @@
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
Core::Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id)
|
||||
: id_(id)
|
||||
Core::Core(const SimContext& ctx, const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id)
|
||||
: SimObject(ctx, "Core")
|
||||
, id_(id)
|
||||
, arch_(arch)
|
||||
, decoder_(decoder)
|
||||
, mem_(mem)
|
||||
, shared_mem_(1, SMEM_SIZE)
|
||||
, inst_in_schedule_("schedule")
|
||||
, inst_in_fetch_("fetch")
|
||||
, inst_in_decode_("decode")
|
||||
, inst_in_issue_("issue")
|
||||
, inst_in_execute_("execute")
|
||||
, inst_in_writeback_("writeback") {
|
||||
in_use_iregs_.resize(arch.num_warps(), 0);
|
||||
in_use_fregs_.resize(arch.num_warps(), 0);
|
||||
in_use_vregs_.reset();
|
||||
|
||||
csrs_.resize(arch_.num_csrs(), 0);
|
||||
|
||||
fcsrs_.resize(arch_.num_warps(), 0);
|
||||
|
||||
barriers_.resize(arch_.num_barriers(), 0);
|
||||
|
||||
warps_.resize(arch_.num_warps());
|
||||
, warps_(arch.num_warps())
|
||||
, barriers_(arch.num_barriers(), 0)
|
||||
, csrs_(arch.num_csrs(), 0)
|
||||
, fcsrs_(arch.num_warps(), 0)
|
||||
, ibuffers_(arch.num_warps(), IBUF_SIZE)
|
||||
, scoreboard_(arch_)
|
||||
, exe_units_((int)ExeType::MAX)
|
||||
, icache_(Cache::Create("Icache", CacheConfig{
|
||||
log2ceil(ICACHE_SIZE), // C
|
||||
log2ceil(L1_BLOCK_SIZE),// B
|
||||
2, // W
|
||||
0, // A
|
||||
32, // address bits
|
||||
1, // number of banks
|
||||
1, // number of ports
|
||||
1, // request size
|
||||
true, // write-throught
|
||||
0, // victim size
|
||||
NUM_WARPS, // mshr
|
||||
2, // pipeline latency
|
||||
}))
|
||||
, dcache_(Cache::Create("Dcache", CacheConfig{
|
||||
log2ceil(DCACHE_SIZE), // C
|
||||
log2ceil(L1_BLOCK_SIZE),// B
|
||||
2, // W
|
||||
0, // A
|
||||
32, // address bits
|
||||
DCACHE_NUM_BANKS, // number of banks
|
||||
DCACHE_NUM_PORTS, // number of ports
|
||||
(uint8_t)arch.num_threads(), // request size
|
||||
true, // write-throught
|
||||
0, // victim size
|
||||
DCACHE_MSHR_SIZE, // mshr
|
||||
2, // pipeline latency
|
||||
}))
|
||||
, l1_mem_switch_(Switch<MemReq, MemRsp>::Create("l1_arb", ArbiterType::Priority, 2))
|
||||
, icache_rsp_port_(this, this, &Core::icache_handleCacheReponse)
|
||||
, dcache_rsp_port_(arch.num_threads(), {this, reinterpret_cast<LsuUnit*>(exe_units_.at((int)ExeType::LSU).get()) , &LsuUnit::handleCacheReponse})
|
||||
, fetch_stage_("fetch")
|
||||
, decode_stage_("decode")
|
||||
, issue_stage_("issue")
|
||||
, execute_stage_("execute")
|
||||
, commit_stage_("writeback")
|
||||
, pending_icache_(arch_.num_warps())
|
||||
, stalled_warps_(0)
|
||||
, last_schedule_wid_(0)
|
||||
, pending_instrs_(0)
|
||||
, ebreak_(false)
|
||||
, stats_insts_(0)
|
||||
, stats_loads_(0)
|
||||
, stats_stores_(0)
|
||||
, MemRspPort(this, &l1_mem_switch_->RspIn)
|
||||
, MemReqPort(this, &l1_mem_switch_->ReqOut)
|
||||
{
|
||||
for (int i = 0; i < arch_.num_warps(); ++i) {
|
||||
warps_[i] = std::make_shared<Warp>(this, i);
|
||||
warps_.at(i) = std::make_shared<Warp>(this, i);
|
||||
}
|
||||
|
||||
this->clear();
|
||||
// register execute units
|
||||
exe_units_.at((int)ExeType::ALU) = std::make_shared<AluUnit>(this);
|
||||
exe_units_.at((int)ExeType::LSU) = std::make_shared<LsuUnit>(this);
|
||||
exe_units_.at((int)ExeType::CSR) = std::make_shared<CsrUnit>(this);
|
||||
exe_units_.at((int)ExeType::FPU) = std::make_shared<FpuUnit>(this);
|
||||
exe_units_.at((int)ExeType::GPU) = std::make_shared<GpuUnit>(this);
|
||||
|
||||
// connect l1 caches
|
||||
icache_->CoreRspPorts.at(0).bind(&icache_rsp_port_);
|
||||
for (int i = 0; i < arch_.num_threads(); ++i) {
|
||||
dcache_->CoreRspPorts.at(i).bind(&dcache_rsp_port_.at(i));
|
||||
}
|
||||
|
||||
// connect l1 switch
|
||||
icache_->MemReqPort.bind(&l1_mem_switch_->ReqIn[0]);
|
||||
dcache_->MemReqPort.bind(&l1_mem_switch_->ReqIn[1]);
|
||||
l1_mem_switch_->RspOut[0].bind(&icache_->MemRspPort);
|
||||
l1_mem_switch_->RspOut[1].bind(&dcache_->MemRspPort);
|
||||
|
||||
// activate warp0
|
||||
warps_.at(0)->setTmask(0, true);
|
||||
}
|
||||
|
||||
Core::~Core() {
|
||||
@@ -51,194 +109,164 @@ Core::~Core() {
|
||||
}
|
||||
}
|
||||
|
||||
void Core::clear() {
|
||||
for (int w = 0; w < arch_.num_warps(); ++w) {
|
||||
in_use_iregs_[w].reset();
|
||||
in_use_fregs_[w].reset();
|
||||
}
|
||||
stalled_warps_.reset();
|
||||
|
||||
in_use_vregs_.reset();
|
||||
|
||||
for (auto& csr : csrs_) {
|
||||
csr = 0;
|
||||
}
|
||||
|
||||
for (auto& fcsr : fcsrs_) {
|
||||
fcsr = 0;
|
||||
}
|
||||
|
||||
for (auto& barrier : barriers_) {
|
||||
barrier.reset();
|
||||
}
|
||||
|
||||
for (auto warp : warps_) {
|
||||
warp->clear();
|
||||
}
|
||||
|
||||
inst_in_schedule_.clear();
|
||||
inst_in_fetch_.clear();
|
||||
inst_in_decode_.clear();
|
||||
inst_in_issue_.clear();
|
||||
inst_in_execute_.clear();
|
||||
inst_in_writeback_.clear();
|
||||
print_bufs_.clear();
|
||||
|
||||
steps_ = 0;
|
||||
insts_ = 0;
|
||||
loads_ = 0;
|
||||
stores_ = 0;
|
||||
|
||||
inst_in_schedule_.valid = true;
|
||||
warps_[0]->setTmask(0, true);
|
||||
|
||||
ebreak_ = false;
|
||||
void Core::icache_handleCacheReponse(const MemRsp& response, uint32_t /*port_id*/) {
|
||||
// advance to decode stage
|
||||
uint32_t wid = response.tag;
|
||||
pipeline_state_t state;
|
||||
pending_icache_.remove(wid, &state);
|
||||
auto latency = (SimPlatform::instance().cycles() - state.icache_latency);
|
||||
state.icache_latency = latency;
|
||||
decode_stage_.push(state);
|
||||
}
|
||||
|
||||
void Core::step() {
|
||||
void Core::step(uint64_t cycle) {
|
||||
__unused (cycle);
|
||||
D(2, "###########################################################");
|
||||
D(2, std::dec << "Core" << id_ << ": cycle: " << cycle);
|
||||
|
||||
steps_++;
|
||||
D(2, std::dec << "Core" << id_ << ": cycle: " << steps_);
|
||||
|
||||
this->writeback();
|
||||
this->commit();
|
||||
this->execute();
|
||||
this->issue();
|
||||
this->decode();
|
||||
this->fetch();
|
||||
this->schedule();
|
||||
|
||||
DPN(2, std::flush);
|
||||
}
|
||||
|
||||
void Core::schedule() {
|
||||
if (!inst_in_schedule_.enter(&inst_in_fetch_))
|
||||
return;
|
||||
|
||||
void Core::warp_scheduler() {
|
||||
bool foundSchedule = false;
|
||||
int scheduled_warp = inst_in_schedule_.wid;
|
||||
int scheduled_warp = last_schedule_wid_;
|
||||
|
||||
for (size_t wid = 0; wid < warps_.size(); ++wid) {
|
||||
// round robin scheduling
|
||||
// round robin scheduling
|
||||
for (size_t wid = 0; wid < warps_.size(); ++wid) {
|
||||
scheduled_warp = (scheduled_warp + 1) % warps_.size();
|
||||
bool is_active = warps_[scheduled_warp]->active();
|
||||
bool stalled = stalled_warps_[scheduled_warp];
|
||||
if (is_active && !stalled) {
|
||||
bool warp_active = warps_.at(scheduled_warp)->active();
|
||||
bool warp_stalled = stalled_warps_.test(scheduled_warp);
|
||||
if (warp_active && !warp_stalled) {
|
||||
last_schedule_wid_ = scheduled_warp;
|
||||
foundSchedule = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!foundSchedule)
|
||||
return;
|
||||
return;
|
||||
|
||||
D(2, "Schedule: wid=" << scheduled_warp);
|
||||
inst_in_schedule_.wid = scheduled_warp;
|
||||
// suspend warp until decode
|
||||
stalled_warps_.set(scheduled_warp);
|
||||
|
||||
// advance pipeline
|
||||
inst_in_schedule_.next(&inst_in_fetch_);
|
||||
auto& warp = warps_.at(scheduled_warp);
|
||||
stats_insts_ += warp->getActiveThreads();
|
||||
|
||||
pipeline_state_t state;
|
||||
warp->eval(&state);
|
||||
|
||||
D(4, state);
|
||||
|
||||
// advance to fetch stage
|
||||
++pending_instrs_;
|
||||
fetch_stage_.push(state);
|
||||
}
|
||||
|
||||
void Core::fetch() {
|
||||
if (!inst_in_fetch_.enter(&inst_in_issue_))
|
||||
return;
|
||||
// schedule icache request
|
||||
pipeline_state_t state;
|
||||
if (fetch_stage_.try_pop(&state)) {
|
||||
state.icache_latency = SimPlatform::instance().cycles();
|
||||
MemReq mem_req;
|
||||
mem_req.addr = state.PC;
|
||||
mem_req.write = false;
|
||||
mem_req.tag = pending_icache_.allocate(state);
|
||||
icache_->CoreReqPorts.at(0).send(mem_req, 1);
|
||||
}
|
||||
|
||||
int wid = inst_in_fetch_.wid;
|
||||
|
||||
auto active_threads_b = warps_[wid]->getActiveThreads();
|
||||
warps_[wid]->step(&inst_in_fetch_);
|
||||
auto active_threads_a = warps_[wid]->getActiveThreads();
|
||||
|
||||
insts_ += active_threads_b;
|
||||
if (active_threads_b != active_threads_a) {
|
||||
D(3, "*** warp#" << wid << " active threads changed to " << active_threads_a);
|
||||
}
|
||||
|
||||
if (inst_in_fetch_.stall_warp) {
|
||||
D(3, "*** warp#" << wid << " fetch stalled");
|
||||
stalled_warps_[wid] = true;
|
||||
}
|
||||
|
||||
D(4, inst_in_fetch_);
|
||||
|
||||
// advance pipeline
|
||||
inst_in_fetch_.next(&inst_in_issue_);
|
||||
// schedule next warp
|
||||
this->warp_scheduler();
|
||||
}
|
||||
|
||||
void Core::decode() {
|
||||
if (!inst_in_decode_.enter(&inst_in_issue_))
|
||||
return;
|
||||
pipeline_state_t state;
|
||||
if (!decode_stage_.try_pop(&state))
|
||||
return;
|
||||
|
||||
// advance pipeline
|
||||
inst_in_decode_.next(&inst_in_issue_);
|
||||
if (state.stall_warp) {
|
||||
D(3, "*** warp#" << state.wid << " fetch stalled");
|
||||
} else {
|
||||
// release warp
|
||||
stalled_warps_.reset(state.wid);
|
||||
}
|
||||
|
||||
// advance to issue stage
|
||||
issue_stage_.push(state);
|
||||
}
|
||||
|
||||
void Core::issue() {
|
||||
if (!inst_in_issue_.enter(&inst_in_execute_))
|
||||
return;
|
||||
if (!issue_stage_.empty()) {
|
||||
// insert to ibuffer
|
||||
auto& state = issue_stage_.top();
|
||||
auto& ibuffer = ibuffers_.at(state.wid);
|
||||
if (!ibuffer.full()) {
|
||||
ibuffer.push(state);
|
||||
issue_stage_.pop();
|
||||
}
|
||||
}
|
||||
|
||||
// issue ibuffer instructions
|
||||
for (auto& ibuffer : ibuffers_) {
|
||||
if (ibuffer.empty())
|
||||
continue;
|
||||
|
||||
bool in_use_regs = (inst_in_issue_.used_iregs & in_use_iregs_[inst_in_issue_.wid]) != 0
|
||||
|| (inst_in_issue_.used_fregs & in_use_fregs_[inst_in_issue_.wid]) != 0
|
||||
|| (inst_in_issue_.used_vregs & in_use_vregs_) != 0;
|
||||
auto& state = ibuffer.top();
|
||||
|
||||
// check scoreboard
|
||||
if (scoreboard_.in_use(state))
|
||||
continue;
|
||||
|
||||
// update scoreboard
|
||||
scoreboard_.reserve(state);
|
||||
|
||||
// advance to execute stage
|
||||
execute_stage_.push(state);
|
||||
|
||||
ibuffer.pop();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Core::execute() {
|
||||
// process stage inputs
|
||||
if (!execute_stage_.empty()) {
|
||||
auto& state = execute_stage_.top();
|
||||
auto& exe_unit = exe_units_.at((int)state.exe_type);
|
||||
exe_unit->push_input(state);
|
||||
execute_stage_.pop();
|
||||
}
|
||||
|
||||
// advance execute units
|
||||
for (auto& exe_unit : exe_units_) {
|
||||
exe_unit->step();
|
||||
}
|
||||
|
||||
if (in_use_regs) {
|
||||
D(3, "*** Issue: registers not ready!");
|
||||
inst_in_issue_.stalled = true;
|
||||
return;
|
||||
}
|
||||
|
||||
switch (inst_in_issue_.rdest_type) {
|
||||
case 1:
|
||||
if (inst_in_issue_.rdest)
|
||||
in_use_iregs_[inst_in_issue_.wid][inst_in_issue_.rdest] = 1;
|
||||
break;
|
||||
case 2:
|
||||
in_use_fregs_[inst_in_issue_.wid][inst_in_issue_.rdest] = 1;
|
||||
break;
|
||||
case 3:
|
||||
in_use_vregs_[inst_in_issue_.rdest] = 1;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
// commit completed instructions
|
||||
for (auto& exe_unit : exe_units_) {
|
||||
pipeline_state_t state;
|
||||
if (exe_unit->pop_output(&state)) {
|
||||
if (state.stall_warp) {
|
||||
stalled_warps_.reset(state.wid);
|
||||
}
|
||||
// advance to commit stage
|
||||
commit_stage_.push(state);
|
||||
}
|
||||
}
|
||||
|
||||
// advance pipeline
|
||||
inst_in_issue_.next(&inst_in_execute_);
|
||||
}
|
||||
|
||||
void Core::execute() {
|
||||
if (!inst_in_execute_.enter(&inst_in_writeback_))
|
||||
void Core::commit() {
|
||||
pipeline_state_t state;
|
||||
if (!commit_stage_.try_pop(&state))
|
||||
return;
|
||||
|
||||
// advance pipeline
|
||||
inst_in_execute_.next(&inst_in_writeback_);
|
||||
}
|
||||
|
||||
void Core::writeback() {
|
||||
if (!inst_in_writeback_.enter(NULL))
|
||||
return;
|
||||
|
||||
switch (inst_in_writeback_.rdest_type) {
|
||||
case 1:
|
||||
in_use_iregs_[inst_in_writeback_.wid][inst_in_writeback_.rdest] = 0;
|
||||
break;
|
||||
case 2:
|
||||
in_use_fregs_[inst_in_writeback_.wid][inst_in_writeback_.rdest] = 0;
|
||||
break;
|
||||
case 3:
|
||||
in_use_vregs_[inst_in_writeback_.rdest] = 0;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (inst_in_writeback_.stall_warp) {
|
||||
stalled_warps_[inst_in_writeback_.wid] = false;
|
||||
D(3, "*** warp#" << inst_in_writeback_.wid << " fetch released");
|
||||
}
|
||||
|
||||
// advance pipeline
|
||||
inst_in_writeback_.next(NULL);
|
||||
// update scoreboard
|
||||
scoreboard_.release(state);
|
||||
}
|
||||
|
||||
Word Core::get_csr(Addr addr, int tid, int wid) {
|
||||
@@ -281,16 +309,16 @@ Word Core::get_csr(Addr addr, int tid, int wid) {
|
||||
return arch_.num_cores();
|
||||
} else if (addr == CSR_MINSTRET) {
|
||||
// NumInsts
|
||||
return insts_;
|
||||
return stats_insts_;
|
||||
} else if (addr == CSR_MINSTRET_H) {
|
||||
// NumInsts
|
||||
return (Word)(insts_ >> 32);
|
||||
return (Word)(stats_insts_ >> 32);
|
||||
} else if (addr == CSR_MCYCLE) {
|
||||
// NumCycles
|
||||
return (Word)steps_;
|
||||
return (Word)SimPlatform::instance().cycles();
|
||||
} else if (addr == CSR_MCYCLE_H) {
|
||||
// NumCycles
|
||||
return (Word)(steps_ >> 32);
|
||||
return (Word)(SimPlatform::instance().cycles() >> 32);
|
||||
} else {
|
||||
return csrs_.at(addr);
|
||||
}
|
||||
@@ -328,7 +356,7 @@ Word Core::icache_fetch(Addr addr) {
|
||||
}
|
||||
|
||||
Word Core::dcache_read(Addr addr, Size size) {
|
||||
++loads_;
|
||||
++stats_loads_;
|
||||
Word data = 0;
|
||||
#ifdef SM_ENABLE
|
||||
if ((addr >= (SMEM_BASE_ADDR - SMEM_SIZE))
|
||||
@@ -342,7 +370,7 @@ Word Core::dcache_read(Addr addr, Size size) {
|
||||
}
|
||||
|
||||
void Core::dcache_write(Addr addr, Word data, Size size) {
|
||||
++stores_;
|
||||
++stats_stores_;
|
||||
#ifdef SM_ENABLE
|
||||
if ((addr >= (SMEM_BASE_ADDR - SMEM_SIZE))
|
||||
&& ((addr + 3) < SMEM_BASE_ADDR)) {
|
||||
@@ -359,23 +387,19 @@ void Core::dcache_write(Addr addr, Word data, Size size) {
|
||||
}
|
||||
|
||||
bool Core::running() const {
|
||||
return inst_in_fetch_.valid
|
||||
|| inst_in_decode_.valid
|
||||
|| inst_in_issue_.valid
|
||||
|| inst_in_execute_.valid
|
||||
|| inst_in_writeback_.valid;
|
||||
return pending_instrs_;
|
||||
}
|
||||
|
||||
void Core::printStats() const {
|
||||
std::cout << "Steps : " << steps_ << std::endl
|
||||
<< "Insts : " << insts_ << std::endl
|
||||
<< "Loads : " << loads_ << std::endl
|
||||
<< "Stores: " << stores_ << std::endl;
|
||||
std::cout << "Cycles: " << SimPlatform::instance().cycles() << std::endl
|
||||
<< "Insts : " << stats_insts_ << std::endl
|
||||
<< "Loads : " << stats_loads_ << std::endl
|
||||
<< "Stores: " << stats_stores_ << std::endl;
|
||||
}
|
||||
|
||||
void Core::writeToStdOut(Addr addr, Word data) {
|
||||
uint32_t tid = (addr - IO_COUT_ADDR) & (IO_COUT_SIZE-1);
|
||||
auto& ss_buf = print_bufs_[tid];
|
||||
auto& ss_buf = print_bufs_.at(tid);
|
||||
char c = (char)data;
|
||||
ss_buf << c;
|
||||
if (c == '\n') {
|
||||
|
||||
Reference in New Issue
Block a user