#include #include #include #include #include "types.h" #include "util.h" #include "archdef.h" #include "mem.h" #include "decode.h" #include "core.h" #include "debug.h" using namespace vortex; Core::Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id) : id_(id) , arch_(arch) , decoder_(decoder) , mem_(mem) , shared_mem_(1, SMEM_SIZE) , inst_in_schedule_("schedule") , inst_in_fetch_("fetch") , inst_in_decode_("decode") , inst_in_issue_("issue") , inst_in_execute_("execute") , inst_in_writeback_("writeback") { in_use_iregs_.resize(arch.num_warps(), 0); in_use_fregs_.resize(arch.num_warps(), 0); in_use_vregs_.reset(); csrs_.resize(arch_.num_csrs(), 0); fcsrs_.resize(arch_.num_warps(), 0); barriers_.resize(arch_.num_barriers(), 0); warps_.resize(arch_.num_warps()); for (int i = 0; i < arch_.num_warps(); ++i) { warps_[i] = std::make_shared(this, i); } this->clear(); } void Core::clear() { for (int w = 0; w < arch_.num_warps(); ++w) { in_use_iregs_[w].reset(); in_use_fregs_[w].reset(); } stalled_warps_.reset(); in_use_vregs_.reset(); for (auto& csr : csrs_) { csr = 0; } for (auto& fcsr : fcsrs_) { fcsr = 0; } for (auto& barrier : barriers_) { barrier.reset(); } for (auto warp : warps_) { warp->clear(); } inst_in_schedule_.clear(); inst_in_fetch_.clear(); inst_in_decode_.clear(); inst_in_issue_.clear(); inst_in_execute_.clear(); inst_in_writeback_.clear(); steps_ = 0; insts_ = 0; loads_ = 0; stores_ = 0; inst_in_schedule_.valid = true; warps_[0]->setTmask(0, true); } void Core::step() { D(3, "###########################################################"); steps_++; D(3, std::dec << "Core" << id_ << ": cycle: " << steps_); DPH(3, "stalled warps:"); for (int i = 0; i < arch_.num_warps(); i++) { DPN(3, " " << stalled_warps_[i]); } DPN(3, "\n"); this->writeback(); this->execute(); this->issue(); this->decode(); this->fetch(); this->schedule(); DPN(3, std::flush); } void Core::schedule() { if (!inst_in_schedule_.enter(&inst_in_fetch_)) return; bool foundSchedule = false; int scheduled_warp = inst_in_schedule_.wid; for (size_t wid = 0; wid < warps_.size(); ++wid) { // round robin scheduling scheduled_warp = (scheduled_warp + 1) % warps_.size(); bool is_active = warps_[scheduled_warp]->active(); bool stalled = stalled_warps_[scheduled_warp]; if (is_active && !stalled) { foundSchedule = true; break; } } if (!foundSchedule) return; D(3, "Schedule: wid=" << scheduled_warp); inst_in_schedule_.wid = scheduled_warp; // advance pipeline inst_in_schedule_.next(&inst_in_fetch_); } void Core::fetch() { if (!inst_in_fetch_.enter(&inst_in_issue_)) return; int wid = inst_in_fetch_.wid; auto active_threads_b = warps_[wid]->getActiveThreads(); warps_[wid]->step(&inst_in_fetch_); auto active_threads_a = warps_[wid]->getActiveThreads(); insts_ += active_threads_b; if (active_threads_b != active_threads_a) { D(3, "** warp #" << wid << " active threads changed from " << active_threads_b << " to " << active_threads_a); } if (inst_in_fetch_.stall_warp) { D(3, "** warp #" << wid << " stalled"); stalled_warps_[wid] = true; } D(4, inst_in_fetch_); // advance pipeline inst_in_fetch_.next(&inst_in_issue_); } void Core::decode() { if (!inst_in_decode_.enter(&inst_in_issue_)) return; // advance pipeline inst_in_decode_.next(&inst_in_issue_); } void Core::issue() { if (!inst_in_issue_.enter(&inst_in_execute_)) return; bool in_use_regs = (inst_in_issue_.used_iregs & in_use_iregs_[inst_in_issue_.wid]) != 0 || (inst_in_issue_.used_fregs & in_use_fregs_[inst_in_issue_.wid]) != 0 || (inst_in_issue_.used_vregs & in_use_vregs_) != 0; if (in_use_regs) { D(3, "Issue: registers not ready!"); inst_in_issue_.stalled = true; return; } switch (inst_in_issue_.rdest_type) { case 1: if (inst_in_issue_.rdest) in_use_iregs_[inst_in_issue_.wid][inst_in_issue_.rdest] = 1; break; case 2: in_use_fregs_[inst_in_issue_.wid][inst_in_issue_.rdest] = 1; break; case 3: in_use_vregs_[inst_in_issue_.rdest] = 1; break; default: break; } // advance pipeline inst_in_issue_.next(&inst_in_execute_); } void Core::execute() { if (!inst_in_execute_.enter(&inst_in_writeback_)) return; // advance pipeline inst_in_execute_.next(&inst_in_writeback_); } void Core::writeback() { if (!inst_in_writeback_.enter(NULL)) return; switch (inst_in_writeback_.rdest_type) { case 1: in_use_iregs_[inst_in_writeback_.wid][inst_in_writeback_.rdest] = 0; break; case 2: in_use_fregs_[inst_in_writeback_.wid][inst_in_writeback_.rdest] = 0; break; case 3: in_use_vregs_[inst_in_writeback_.rdest] = 0; break; default: break; } if (inst_in_writeback_.stall_warp) { stalled_warps_[inst_in_writeback_.wid] = 0; } // advance pipeline inst_in_writeback_.next(NULL); } Word Core::get_csr(Addr addr, int tid, int wid) { if (addr == CSR_FFLAGS) { return fcsrs_.at(wid) & 0x1F; } else if (addr == CSR_FRM) { return (fcsrs_.at(wid) >> 5); } else if (addr == CSR_FCSR) { return fcsrs_.at(wid); } else if (addr == CSR_WTID) { // Warp threadID return tid; } else if (addr == CSR_LTID) { // Core threadID return tid + (wid * arch_.num_threads()); } else if (addr == CSR_GTID) { // Processor threadID return tid + (wid * arch_.num_threads()) + (arch_.num_threads() * arch_.num_warps() * id_); } else if (addr == CSR_LWID) { // Core warpID return wid; } else if (addr == CSR_GWID) { // Processor warpID return wid + (arch_.num_warps() * id_); } else if (addr == CSR_GCID) { // Processor coreID return id_; } else if (addr == CSR_NT) { // Number of threads per warp return arch_.num_threads(); } else if (addr == CSR_NW) { // Number of warps per core return arch_.num_warps(); } else if (addr == CSR_NC) { // Number of cores return arch_.num_cores(); } else if (addr == CSR_INSTRET) { // NumInsts return insts_; } else if (addr == CSR_INSTRET_H) { // NumInsts return (Word)(insts_ >> 32); } else if (addr == CSR_CYCLE) { // NumCycles return (Word)steps_; } else if (addr == CSR_CYCLE_H) { // NumCycles return (Word)(steps_ >> 32); } else { return csrs_.at(addr); } } void Core::set_csr(Addr addr, Word value, int /*tid*/, int wid) { if (addr == CSR_FFLAGS) { fcsrs_.at(wid) = (fcsrs_.at(wid) & ~0x1F) | (value & 0x1F); } else if (addr == CSR_FRM) { fcsrs_.at(wid) = (fcsrs_.at(wid) & ~0xE0) | (value << 5); } else if (addr == CSR_FCSR) { fcsrs_.at(wid) = value & 0xff; } else { csrs_.at(addr) = value; } } void Core::barrier(int bar_id, int count, int warp_id) { auto& barrier = barriers_.at(bar_id); barrier.set(warp_id); if (barrier.count() < (size_t)count) return; for (int i = 0; i < arch_.num_warps(); ++i) { if (barrier.test(i)) { warps_.at(i)->activate(); } } barrier.reset(); } Word Core::icache_fetch(Addr addr) { Word data; mem_.read(addr, &data, sizeof(Word), 0); return data; } Word Core::dcache_read(Addr addr, Size size) { ++loads_; Word data = 0; #ifdef SM_ENABLE if ((addr >= (SMEM_BASE_ADDR - SMEM_SIZE)) && ((addr + 3) < SMEM_BASE_ADDR)) { shared_mem_.read(addr & (SMEM_SIZE-1), &data, size); return data; } #endif mem_.read(addr, &data, size, 0); return data; } void Core::dcache_write(Addr addr, Word data, Size size) { ++stores_; #ifdef SM_ENABLE if ((addr >= (SMEM_BASE_ADDR - SMEM_SIZE)) && ((addr + 3) < SMEM_BASE_ADDR)) { shared_mem_.write(addr & (SMEM_SIZE-1), &data, size); return; } #endif mem_.write(addr, &data, size, 0); } bool Core::running() const { return inst_in_fetch_.valid || inst_in_decode_.valid || inst_in_issue_.valid || inst_in_execute_.valid || inst_in_writeback_.valid; } void Core::printStats() const { std::cout << "Steps : " << steps_ << std::endl << "Insts : " << insts_ << std::endl << "Loads : " << loads_ << std::endl << "Stores: " << stores_ << std::endl; }