simX refactoring

This commit is contained in:
Blaise Tine
2021-02-27 02:27:19 -08:00
parent 4fe345f269
commit a8452483fe
20 changed files with 1198 additions and 1789 deletions

View File

@@ -185,7 +185,7 @@ extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
*value = IMPLEMENTATION_ID; *value = IMPLEMENTATION_ID;
break; break;
case VX_CAPS_MAX_CORES: case VX_CAPS_MAX_CORES:
*value = NUM_CORES; *value = NUM_CORES * NUM_CLUSTERS;
break; break;
case VX_CAPS_MAX_WARPS: case VX_CAPS_MAX_WARPS:
*value = NUM_WARPS; *value = NUM_WARPS;

View File

@@ -12,8 +12,8 @@ CXXFLAGS += -DDUMP_PERF_STATS
#CONFIGS ?= -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 #CONFIGS ?= -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 #CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 #CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1 CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
CXXFLAGS += $(CONFIGS) CXXFLAGS += $(CONFIGS)
@@ -21,7 +21,7 @@ LDFLAGS += -shared -pthread
#LDFLAGS += -dynamiclib -pthread #LDFLAGS += -dynamiclib -pthread
SRCS = vortex.cpp ../common/vx_utils.cpp SRCS = vortex.cpp ../common/vx_utils.cpp
SRCS += $(SIMX_DIR)/util.cpp $(SIMX_DIR)/args.cpp $(SIMX_DIR)/mem.cpp $(SIMX_DIR)/core.cpp $(SIMX_DIR)/warp.cpp $(SIMX_DIR)/instr.cpp $(SIMX_DIR)/decode.cpp $(SIMX_DIR)/execute.cpp SRCS += $(SIMX_DIR)/util.cpp $(SIMX_DIR)/args.cpp $(SIMX_DIR)/mem.cpp $(SIMX_DIR)/warp.cpp $(SIMX_DIR)/core.cpp $(SIMX_DIR)/decode.cpp $(SIMX_DIR)/execute.cpp
# Debugigng # Debugigng
ifdef DEBUG ifdef DEBUG

View File

@@ -144,19 +144,18 @@ private:
void run() { void run() {
vortex::ArchDef arch("rv32i", NUM_CORES, NUM_WARPS, NUM_THREADS); vortex::ArchDef arch("rv32i", NUM_CORES, NUM_WARPS, NUM_THREADS);
vortex::Decoder decoder(arch); vortex::Decoder decoder(arch);
vortex::MemoryUnit mu(PAGE_SIZE, arch.getWordSize(), true); vortex::MemoryUnit mu(PAGE_SIZE, arch.wsize(), true);
mu.attach(ram_, 0); mu.attach(ram_, 0);
std::vector<std::shared_ptr<vortex::Core>> cores(NUM_CORES); std::vector<std::shared_ptr<vortex::Core>> cores(arch.num_cores());
for (size_t i = 0; i < NUM_CORES; ++i) { for (int i = 0; i < arch.num_cores(); ++i) {
cores[i] = std::make_shared<vortex::Core>(arch, decoder, mu); cores[i] = std::make_shared<vortex::Core>(arch, decoder, mu, i);
} }
bool running; bool running;
do { do {
running = false; running = false;
for (size_t i = 0; i < NUM_CORES; ++i) { for (int i = 0; i < arch.num_cores(); ++i) {
if (!cores[i]->running()) if (!cores[i]->running())
continue; continue;
running = true; running = true;
@@ -236,7 +235,7 @@ extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
*value = IMPLEMENTATION_ID; *value = IMPLEMENTATION_ID;
break; break;
case VX_CAPS_MAX_CORES: case VX_CAPS_MAX_CORES:
*value = NUM_CORES; *value = NUM_CORES * NUM_CLUSTERS;
break; break;
case VX_CAPS_MAX_WARPS: case VX_CAPS_MAX_WARPS:
*value = NUM_WARPS; *value = NUM_WARPS;

View File

@@ -13,7 +13,7 @@ RTL_DIR = ../hw/rtl
PROJECT = simX PROJECT = simX
SRCS = util.cpp args.cpp mem.cpp core.cpp warp.cpp instr.cpp decode.cpp execute.cpp main.cpp SRCS = util.cpp args.cpp mem.cpp warp.cpp core.cpp decode.cpp execute.cpp main.cpp
# Debugigng # Debugigng
ifdef DEBUG ifdef DEBUG

View File

@@ -11,156 +11,56 @@ namespace vortex {
class ArchDef { class ArchDef {
public: public:
struct Undefined {}; ArchDef(const std::string &/*arch*/,
ArchDef(const std::string &s,
int num_cores, int num_cores,
int num_warps, int num_warps,
int num_threads) { int num_threads) {
std::istringstream iss(s.c_str()); wsize_ = 4;
wordSize_ = 4; vsize_ = 16;
encChar_ = 'w'; num_regs_ = 32;
numRegs_ = 32; num_csrs_ = 4096;
numPRegs_ = 0; num_cores_ = num_cores;
numCores_ = num_cores; num_warps_ = num_warps;
numWarps_ = num_warps; num_threads_ = num_threads;
numThreads_ = num_threads;
extent_ = EXT_END;
} }
operator std::string () const { int wsize() const {
if (extent_ == EXT_NULL) return wsize_;
return "";
std::ostringstream oss;
if (extent_ >= EXT_WORDSIZE) oss << wordSize_;
if (extent_ >= EXT_ENC ) oss << encChar_;
if (extent_ >= EXT_REGS ) oss << numRegs_;
if (extent_ >= EXT_PREGS ) oss << '/' << numPRegs_;
if (extent_ >= EXT_THREADS ) oss << '/' << numThreads_;
if (extent_ >= EXT_WARPS ) oss << '/' << numWarps_;
if (extent_ >= EXT_CORES ) oss << '/' << numCores_;
return oss.str();
} }
bool operator==(const ArchDef &r) const { int vsize() const {
Extent minExtent(r.extent_ > extent_ ? extent_ : r.extent_); return vsize_;
// Can't be equal if we can't specify a binary encoding at all.
if (minExtent < EXT_PREGS)
return false;
if (minExtent >= EXT_WORDSIZE) {
if (wordSize_!=r.wordSize_)
return false;
}
if (minExtent >= EXT_ENC) {
if (encChar_ != r.encChar_)
return false;
}
if (minExtent >= EXT_REGS) {
if (numRegs_ != r.numRegs_)
return false;
}
if (minExtent >= EXT_PREGS) {
if (numPRegs_ != r.numPRegs_)
return false;
}
if (minExtent >= EXT_THREADS) {
if (numThreads_ != r.numThreads_)
return false;
}
if (minExtent >= EXT_WARPS) {
if (numWarps_ != r.numWarps_)
return false;
}
if (minExtent >= EXT_CORES) {
if (numCores_ != r.numCores_)
return false;
}
return true;
} }
bool operator!=(const ArchDef &r) const { int num_regs() const {
return !(*this == r); return num_regs_;
} }
Size getWordSize() const { int num_csrs() const {
if (extent_ < EXT_WORDSIZE) return num_csrs_;
throw Undefined();
return wordSize_;
} }
char getEncChar() const { int num_threads() const {
if ((extent_ < EXT_ENC) || (encChar_ == 'x')) return num_threads_;
throw Undefined();
return encChar_;
} }
RegNum getNumRegs() const { int num_warps() const {
if (extent_ < EXT_REGS) return num_warps_;
throw Undefined();
return numRegs_;
} }
RegNum getNumPRegs() const { int num_cores() const {
if (extent_ < EXT_PREGS) return num_cores_;
throw Undefined();
return numPRegs_;
}
ThdNum getNumThreads() const {
if (extent_ < EXT_THREADS)
throw Undefined();
return numThreads_;
}
ThdNum getNumWarps() const {
if (extent_ < EXT_WARPS)
throw Undefined();
return numWarps_;
}
ThdNum getNumCores() const {
if (extent_ < EXT_CORES)
throw Undefined();
return numCores_;
}
bool is_cpu_mode() const {
return cpu_mode_;
} }
private: private:
enum Extent {
EXT_NULL,
EXT_WORDSIZE,
EXT_ENC,
EXT_REGS,
EXT_PREGS,
EXT_THREADS,
EXT_WARPS,
EXT_CORES,
EXT_END
};
Extent extent_; int wsize_;
Size wordSize_; int vsize_;
ThdNum numThreads_; int num_regs_;
ThdNum numWarps_; int num_csrs_;
ThdNum numCores_; int num_threads_;
RegNum numRegs_; int num_warps_;
ThdNum numPRegs_; int num_cores_;
char encChar_;
bool cpu_mode_;
}; };
} }

View File

@@ -1,10 +1,7 @@
#include <iostream> #include <iostream>
#include <iomanip> #include <iomanip>
#include <string.h> #include <string.h>
#include <assert.h>
// #define USE_DEBUG 7
// #define PRINT_ACTIVE_THREADS
#include "types.h" #include "types.h"
#include "util.h" #include "util.h"
#include "archdef.h" #include "archdef.h"
@@ -14,21 +11,25 @@
#include "debug.h" #include "debug.h"
#define INIT_TRACE(trace_inst) \ #define INIT_TRACE(trace_inst) \
trace_inst.valid_inst = false; \ trace_inst.valid = false; \
trace_inst.pc = 0; \ trace_inst.PC = 0; \
trace_inst.wid = schedule_w_; \ trace_inst.wid = schedule_w_; \
trace_inst.rs1 = -1; \ trace_inst.irs1 = -1; \
trace_inst.rs2 = -1; \ trace_inst.irs2 = -1; \
trace_inst.rd = -1; \ trace_inst.frs1 = -1; \
trace_inst.vs1 = -1; \ trace_inst.frs2 = -1; \
trace_inst.vs2 = -1; \ trace_inst.frs3 = -1; \
trace_inst.vd = -1; \ trace_inst.frd = -1; \
trace_inst.ird = -1; \
trace_inst.vrs1 = -1; \
trace_inst.vrs2 = -1; \
trace_inst.vrd = -1; \
trace_inst.is_lw = false; \ trace_inst.is_lw = false; \
trace_inst.is_sw = false; \ trace_inst.is_sw = false; \
if (trace_inst.mem_addresses != NULL) \ if (trace_inst.mem_addresses != NULL) \
free(trace_inst.mem_addresses); \ free(trace_inst.mem_addresses); \
trace_inst.mem_addresses = (unsigned *)malloc(32 * sizeof(unsigned)); \ trace_inst.mem_addresses = (unsigned *)malloc(32 * sizeof(unsigned)); \
for (ThdNum tid = 0; tid < arch_.getNumThreads(); tid++) \ for (int tid = 0; tid < arch_.num_threads(); tid++) \
trace_inst.mem_addresses[tid] = 0xdeadbeef; \ trace_inst.mem_addresses[tid] = 0xdeadbeef; \
trace_inst.mem_stall_cycles = 0; \ trace_inst.mem_stall_cycles = 0; \
trace_inst.fetch_stall_cycles = 0; \ trace_inst.fetch_stall_cycles = 0; \
@@ -37,18 +38,22 @@
trace_inst.stalled = false; trace_inst.stalled = false;
#define CPY_TRACE(drain, source) \ #define CPY_TRACE(drain, source) \
drain.valid_inst = source.valid_inst; \ drain.valid = source.valid; \
drain.pc = source.pc; \ drain.PC = source.PC; \
drain.wid = source.wid; \ drain.wid = source.wid; \
drain.rs1 = source.rs1; \ drain.irs1 = source.irs1; \
drain.rs2 = source.rs2; \ drain.irs2 = source.irs2; \
drain.rd = source.rd; \ drain.ird = source.ird; \
drain.vs1 = source.vs1; \ drain.frs1 = source.frs1; \
drain.vs2 = source.vs2; \ drain.frs2 = source.frs2; \
drain.vd = source.vd; \ drain.frs3 = source.frs3; \
drain.frd = source.frd; \
drain.vrs1 = source.vrs1; \
drain.vrs2 = source.vrs2; \
drain.vrd = source.vrd; \
drain.is_lw = source.is_lw; \ drain.is_lw = source.is_lw; \
drain.is_sw = source.is_sw; \ drain.is_sw = source.is_sw; \
for (ThdNum tid = 0; tid < arch_.getNumThreads(); tid++)\ for (int tid = 0; tid < arch_.num_threads(); tid++) \
drain.mem_addresses[tid] = source.mem_addresses[tid]; \ drain.mem_addresses[tid] = source.mem_addresses[tid]; \
drain.mem_stall_cycles = source.mem_stall_cycles; \ drain.mem_stall_cycles = source.mem_stall_cycles; \
drain.fetch_stall_cycles = source.fetch_stall_cycles; \ drain.fetch_stall_cycles = source.fetch_stall_cycles; \
@@ -60,17 +65,17 @@ using namespace vortex;
void printTrace(trace_inst_t *trace, const char *stage_name) { void printTrace(trace_inst_t *trace, const char *stage_name) {
__unused(trace, stage_name); __unused(trace, stage_name);
D(3, stage_name << ": valid=" << trace->valid_inst); D(4, stage_name << ": valid=" << trace->valid);
D(3, stage_name << ": PC=" << std::hex << trace->pc << std::dec); D(4, stage_name << ": PC=" << std::hex << trace->PC << std::dec);
D(3, stage_name << ": wid=" << trace->wid); D(4, stage_name << ": wid=" << trace->wid);
D(3, stage_name << ": rd=" << trace->rd << ", rs1=" << trace->rs1 << ", trs2=" << trace->rs2); D(4, stage_name << ": rd=" << trace->ird << ", rs1=" << trace->irs1 << ", trs2=" << trace->irs2);
D(3, stage_name << ": is_lw=" << trace->is_lw); D(4, stage_name << ": is_lw=" << trace->is_lw);
D(3, stage_name << ": is_sw=" << trace->is_sw); D(4, stage_name << ": is_sw=" << trace->is_sw);
D(3, stage_name << ": fetch_stall_cycles=" << trace->fetch_stall_cycles); D(4, stage_name << ": fetch_stall_cycles=" << trace->fetch_stall_cycles);
D(3, stage_name << ": mem_stall_cycles=" << trace->mem_stall_cycles); D(4, stage_name << ": mem_stall_cycles=" << trace->mem_stall_cycles);
D(3, stage_name << ": stall_warp=" << trace->stall_warp); D(4, stage_name << ": stall_warp=" << trace->stall_warp);
D(3, stage_name << ": wspawn=" << trace->wspawn); D(4, stage_name << ": wspawn=" << trace->wspawn);
D(3, stage_name << ": stalled=" << trace->stalled); D(4, stage_name << ": stalled=" << trace->stalled);
} }
Core::Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id) Core::Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id)
@@ -79,8 +84,7 @@ Core::Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id)
, decoder_(decoder) , decoder_(decoder)
, mem_(mem) , mem_(mem)
, steps_(0) , steps_(0)
, num_instructions_(0) { , num_insts_(0) {
release_warp_ = false;
foundSchedule_ = true; foundSchedule_ = true;
schedule_w_ = 0; schedule_w_ = 0;
@@ -98,23 +102,17 @@ Core::Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id)
INIT_TRACE(inst_in_lsu_); INIT_TRACE(inst_in_lsu_);
INIT_TRACE(inst_in_wb_); INIT_TRACE(inst_in_wb_);
for (int i = 0; i < 32; i++) { iRenameTable_.resize(arch.num_warps(), std::vector<bool>(arch.num_regs(), false));
stalled_warps_[i] = false; fRenameTable_.resize(arch.num_warps(), std::vector<bool>(arch.num_regs(), false));
for (int j = 0; j < 32; j++) { vRenameTable_.resize(arch.num_regs(), false);
renameTable_[i][j] = true;
} stalled_warps_.resize(arch.num_warps(), false);
for (int i = 0; i < arch_.num_warps(); ++i) {
warps_.emplace_back(this, i);
} }
for (int i = 0; i < 32; i++) { warps_[0].setTmask(0, true);
vecRenameTable_[i] = true;
}
for (unsigned i = 0; i < arch_.getNumWarps(); ++i) {
warps_.push_back(Warp(this, i));
}
warps_[0].setActiveThreads(1);
warps_[0].setSpawned(true);
} }
Core::~Core() { Core::~Core() {
@@ -125,32 +123,20 @@ void Core::step() {
D(3, "###########################################################"); D(3, "###########################################################");
steps_++; steps_++;
D(3, "cycle: " << steps_); D(3, std::dec << "Core" << id_ << ": cycle: " << steps_);
DPH(3, "stalled warps:"); DPH(3, "stalled warps:");
for (ThdNum widd = 0; widd < arch_.getNumWarps(); widd++) { for (int i = 0; i < arch_.num_warps(); i++) {
DPN(3, " " << stalled_warps_[widd]); DPN(3, " " << stalled_warps_[i]);
} }
DPN(3, "\n"); DPN(3, "\n");
// cout << "About to call writeback" << std::endl;
this->writeback(); this->writeback();
// cout << "About to call load_store" << std::endl;
this->load_store(); this->load_store();
// cout << "About to call execute_unit" << std::endl;
this->execute_unit(); this->execute_unit();
// cout << "About to call scheduler" << std::endl;
this->scheduler(); this->scheduler();
// cout << "About to call decode" << std::endl;
this->decode(); this->decode();
// D(3, "About to call fetch" << std::flush);
this->fetch(); this->fetch();
// D(3, "Finished fetch" << std::flush);
if (release_warp_) {
release_warp_ = false;
stalled_warps_[release_warp_num_] = false;
}
DPN(3, std::flush); DPN(3, std::flush);
} }
@@ -161,10 +147,8 @@ void Core::warpScheduler() {
for (size_t wid = 0; wid < warps_.size(); ++wid) { for (size_t wid = 0; wid < warps_.size(); ++wid) {
// round robin scheduling // round robin scheduling
next_warp = (next_warp + 1) % warps_.size(); next_warp = (next_warp + 1) % warps_.size();
bool has_active_threads = warps_[next_warp].active();
bool has_active_threads = (warps_[next_warp].getActiveThreads() > 0);
bool stalled = stalled_warps_[next_warp]; bool stalled = stalled_warps_[next_warp];
if (has_active_threads && !stalled) { if (has_active_threads && !stalled) {
foundSchedule_ = true; foundSchedule_ = true;
break; break;
@@ -174,35 +158,28 @@ void Core::warpScheduler() {
} }
void Core::fetch() { void Core::fetch() {
// D(-1, "Found schedule: " << foundSchedule_);
if ((!inst_in_scheduler_.stalled) if ((!inst_in_scheduler_.stalled)
&& (inst_in_fetch_.fetch_stall_cycles == 0)) { && (inst_in_fetch_.fetch_stall_cycles == 0)) {
// CPY_TRACE(inst_in_decode_, inst_in_fetch_); INIT_TRACE(inst_in_fetch_);
// if (warps_[schedule_w_].activeThreads)
{
INIT_TRACE(inst_in_fetch_);
if (foundSchedule_) { if (foundSchedule_) {
auto active_threads_b = warps_[schedule_w_].getActiveThreads(); auto active_threads_b = warps_[schedule_w_].getActiveThreads();
num_insts_ = num_insts_ + warps_[schedule_w_].getActiveThreads();
num_instructions_ = num_instructions_ + warps_[schedule_w_].getActiveThreads(); warps_[schedule_w_].step(&inst_in_fetch_);
warps_[schedule_w_].step(&inst_in_fetch_);
auto active_threads_a = warps_[schedule_w_].getActiveThreads(); auto active_threads_a = warps_[schedule_w_].getActiveThreads();
if (active_threads_b != active_threads_a) { if (active_threads_b != active_threads_a) {
D(3, "** warp #" << schedule_w_ << " active threads changed from " << active_threads_b << " to " << active_threads_a); D(3, "** warp #" << schedule_w_ << " active threads changed from " << active_threads_b << " to " << active_threads_a);
} }
this->getCacheDelays(&inst_in_fetch_); this->getCacheDelays(&inst_in_fetch_);
if (inst_in_fetch_.stall_warp) { if (inst_in_fetch_.stall_warp) {
stalled_warps_[inst_in_fetch_.wid] = true; stalled_warps_[inst_in_fetch_.wid] = true;
}
} }
this->warpScheduler();
} }
this->warpScheduler();
} else { } else {
inst_in_fetch_.stalled = false; inst_in_fetch_.stalled = false;
if (inst_in_fetch_.fetch_stall_cycles > 0) if (inst_in_fetch_.fetch_stall_cycles > 0)
@@ -218,7 +195,6 @@ void Core::decode() {
CPY_TRACE(inst_in_decode_, inst_in_fetch_); CPY_TRACE(inst_in_decode_, inst_in_fetch_);
INIT_TRACE(inst_in_fetch_); INIT_TRACE(inst_in_fetch_);
} }
//printTrace(&inst_in_decode_, "Decode");
} }
void Core::scheduler() { void Core::scheduler() {
@@ -226,136 +202,162 @@ void Core::scheduler() {
CPY_TRACE(inst_in_scheduler_, inst_in_decode_); CPY_TRACE(inst_in_scheduler_, inst_in_decode_);
INIT_TRACE(inst_in_decode_); INIT_TRACE(inst_in_decode_);
} }
//printTrace(&inst_in_scheduler_, "Scheduler");
} }
void Core::load_store() { void Core::load_store() {
if ((inst_in_lsu_.mem_stall_cycles > 0) || (inst_in_lsu_.stalled)) { if ((inst_in_lsu_.mem_stall_cycles > 0) || inst_in_lsu_.stalled) {
// LSU currently busy // LSU currently busy
if ((inst_in_scheduler_.is_lw || inst_in_scheduler_.is_sw)) { if ((inst_in_scheduler_.is_lw || inst_in_scheduler_.is_sw)) {
inst_in_scheduler_.stalled = true; inst_in_scheduler_.stalled = true;
} }
} else { } else {
// LSU not busy if (!inst_in_scheduler_.is_lw && !inst_in_scheduler_.is_sw)
if (inst_in_scheduler_.is_lw || inst_in_scheduler_.is_sw) { return;
// Scheduler has LSU inst
bool scheduler_srcs_ready = true;
if (inst_in_scheduler_.rs1 > 0) {
scheduler_srcs_ready = scheduler_srcs_ready && renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs1];
}
if (inst_in_scheduler_.rs2 > 0) { // Scheduler has LSU inst
scheduler_srcs_ready = scheduler_srcs_ready && renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs2]; bool scheduler_srcs_busy = false;
}
if (inst_in_scheduler_.vs1 > 0) { if (inst_in_scheduler_.irs1 > 0) {
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable_[inst_in_scheduler_.vs1]; scheduler_srcs_busy = scheduler_srcs_busy || iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.irs1];
} }
if (inst_in_scheduler_.vs2 > 0) {
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable_[inst_in_scheduler_.vs2];
}
if (scheduler_srcs_ready) { if (inst_in_scheduler_.irs2 > 0) {
if (inst_in_scheduler_.rd != -1) scheduler_srcs_busy = scheduler_srcs_busy || iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.irs2];
renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rd] = false; }
if (inst_in_scheduler_.rd != -1)
vecRenameTable_[inst_in_scheduler_.vd] = false; if (inst_in_scheduler_.frs1 >= 0) {
CPY_TRACE(inst_in_lsu_, inst_in_scheduler_); scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs1];
INIT_TRACE(inst_in_scheduler_); }
} else {
inst_in_scheduler_.stalled = true; if (inst_in_scheduler_.frs2 >= 0) {
// INIT_TRACE(inst_in_lsu_); scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs2];
} }
} else {
// INIT_TRACE(inst_in_lsu_); if (inst_in_scheduler_.frs3 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs3];
}
if (inst_in_scheduler_.vrs1 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || vRenameTable_[inst_in_scheduler_.vrs1];
}
if (inst_in_scheduler_.vrs2 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || vRenameTable_[inst_in_scheduler_.vrs2];
}
if (scheduler_srcs_busy) {
inst_in_scheduler_.stalled = true;
} else {
if (inst_in_scheduler_.ird > 0)
iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.ird] = true;
if (inst_in_scheduler_.frd >= 0)
fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frd] = true;
if (inst_in_scheduler_.vrd >= 0)
vRenameTable_[inst_in_scheduler_.vrd] = true;
CPY_TRACE(inst_in_lsu_, inst_in_scheduler_);
INIT_TRACE(inst_in_scheduler_);
} }
} }
if (inst_in_lsu_.mem_stall_cycles > 0) if (inst_in_lsu_.mem_stall_cycles > 0)
inst_in_lsu_.mem_stall_cycles--; inst_in_lsu_.mem_stall_cycles--;
//printTrace(&inst_in_lsu_, "LSU");
} }
void Core::execute_unit() { void Core::execute_unit() {
// EXEC is always not busy if (inst_in_scheduler_.is_lw || inst_in_scheduler_.is_sw)
if (inst_in_scheduler_.is_lw || inst_in_scheduler_.is_sw) { return;
// Not an execute instruction
// INIT_TRACE(inst_in_exe_); bool scheduler_srcs_busy = false;
} else {
bool scheduler_srcs_ready = true;
if (inst_in_scheduler_.rs1 > 0) {
scheduler_srcs_ready = scheduler_srcs_ready && renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs1];
// cout << "Rename RS1: " << inst_in_scheduler_.rs1 << " is " << renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs1] << " wid: " << inst_in_scheduler_.wid << '\n';
}
if (inst_in_scheduler_.rs2 > 0) { if (inst_in_scheduler_.irs1 > 0) {
scheduler_srcs_ready = scheduler_srcs_ready && renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs2]; scheduler_srcs_busy = scheduler_srcs_busy || iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.irs1];
// cout << "Rename RS2: " << inst_in_scheduler_.rs1 << " is " << renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs2] << " wid: " << inst_in_scheduler_.wid << '\n';
}
// cout << "About to check vs*\n" << std::flush;
if (inst_in_scheduler_.vs1 > 0) {
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable_[inst_in_scheduler_.vs1];
}
if (inst_in_scheduler_.vs2 > 0) {
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable_[inst_in_scheduler_.vs2];
}
// cout << "Finished sources\n" << std::flush;
if (scheduler_srcs_ready) {
if (inst_in_scheduler_.rd != -1) {
// cout << "rename setting rd: " << inst_in_scheduler_.rd << " to not useabel wid: " << inst_in_scheduler_.wid << '\n';
renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rd] = false;
}
// cout << "About to check vector wb: " << inst_in_scheduler_.vd << "\n" << std::flush;
if (inst_in_scheduler_.vd != -1) {
vecRenameTable_[inst_in_scheduler_.vd] = false;
}
// cout << "Finished wb checking" << "\n" << std::flush;
CPY_TRACE(inst_in_exe_, inst_in_scheduler_);
INIT_TRACE(inst_in_scheduler_);
// cout << "Finished trace copying and clearning" << "\n" << std::flush;
} else {
D(3, "Execute: srcs not ready!");
inst_in_scheduler_.stalled = true;
// INIT_TRACE(inst_in_exe_);
}
} }
//printTrace(&inst_in_exe_, "EXE"); if (inst_in_scheduler_.irs2 > 0) {
// INIT_TRACE(inst_in_exe_); scheduler_srcs_busy = scheduler_srcs_busy || iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.irs2];
}
if (inst_in_scheduler_.frs1 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs1];
}
if (inst_in_scheduler_.frs2 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs2];
}
if (inst_in_scheduler_.frs3 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs3];
}
if (inst_in_scheduler_.vrs1 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || vRenameTable_[inst_in_scheduler_.vrs1];
}
if (inst_in_scheduler_.vrs2 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || vRenameTable_[inst_in_scheduler_.vrs2];
}
if (scheduler_srcs_busy) {
D(3, "Execute: srcs not ready!");
inst_in_scheduler_.stalled = true;
} else {
if (inst_in_scheduler_.ird > 0) {
iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.ird] = true;
}
if (inst_in_scheduler_.frd >= 0) {
fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frd] = true;
}
if (inst_in_scheduler_.vrd >= 0) {
vRenameTable_[inst_in_scheduler_.vrd] = true;
}
CPY_TRACE(inst_in_exe_, inst_in_scheduler_);
INIT_TRACE(inst_in_scheduler_);
}
} }
void Core::writeback() { void Core::writeback() {
if (inst_in_wb_.rd > 0) if (inst_in_wb_.ird > 0) {
renameTable_[inst_in_wb_.wid][inst_in_wb_.rd] = true; iRenameTable_[inst_in_wb_.wid][inst_in_wb_.ird] = false;
if (inst_in_wb_.vd > 0) }
vecRenameTable_[inst_in_wb_.vd] = true;
if (inst_in_wb_.frd >= 0) {
fRenameTable_[inst_in_wb_.wid][inst_in_wb_.frd] = false;
}
if (inst_in_wb_.vrd >= 0) {
vRenameTable_[inst_in_wb_.vrd] = false;
}
if (inst_in_wb_.stall_warp) { if (inst_in_wb_.stall_warp) {
stalled_warps_[inst_in_wb_.wid] = false; stalled_warps_[inst_in_wb_.wid] = false;
// release_warp_ = true;
// release_warp_num_ = inst_in_wb_.wid;
} }
INIT_TRACE(inst_in_wb_); INIT_TRACE(inst_in_wb_);
bool serviced_exe = false; bool serviced_exe = false;
if ((inst_in_exe_.rd > 0) || (inst_in_exe_.stall_warp)) { if ((inst_in_exe_.ird > 0)
|| (inst_in_exe_.frd >= 0)
|| (inst_in_exe_.vrd >= 0)
|| (inst_in_exe_.stall_warp)) {
CPY_TRACE(inst_in_wb_, inst_in_exe_); CPY_TRACE(inst_in_wb_, inst_in_exe_);
INIT_TRACE(inst_in_exe_); INIT_TRACE(inst_in_exe_);
serviced_exe = true; serviced_exe = true;
// cout << "WRITEBACK SERVICED EXE\n";
} }
if (inst_in_lsu_.is_sw) { if (inst_in_lsu_.is_sw) {
INIT_TRACE(inst_in_lsu_); INIT_TRACE(inst_in_lsu_);
} else { } else {
if (((inst_in_lsu_.rd > 0) || (inst_in_lsu_.vd > 0)) && (inst_in_lsu_.mem_stall_cycles == 0)) { if (((inst_in_lsu_.ird > 0)
|| (inst_in_lsu_.frd >= 0)
|| (inst_in_lsu_.vrd >= 0))
&& (inst_in_lsu_.mem_stall_cycles == 0)) {
if (serviced_exe) { if (serviced_exe) {
D(3, "$$$$$$$$$$$$$$$$$$$$ Stalling LSU because EXE is being used"); // Stalling LSU because EXE is busy
inst_in_lsu_.stalled = true; inst_in_lsu_.stalled = true;
} else { } else {
CPY_TRACE(inst_in_wb_, inst_in_lsu_); CPY_TRACE(inst_in_wb_, inst_in_lsu_);
@@ -366,27 +368,28 @@ void Core::writeback() {
} }
void Core::getCacheDelays(trace_inst_t *trace_inst) { void Core::getCacheDelays(trace_inst_t *trace_inst) {
trace_inst->fetch_stall_cycles += 3; trace_inst->fetch_stall_cycles += 1;
if (trace_inst->is_sw || trace_inst->is_lw) { if (trace_inst->is_sw || trace_inst->is_lw) {
trace_inst->mem_stall_cycles += 5; trace_inst->mem_stall_cycles += 3;
} }
} }
bool Core::running() const { bool Core::running() const {
bool stages_have_valid = inst_in_fetch_.valid_inst bool stages_have_valid = inst_in_fetch_.valid
|| inst_in_decode_.valid_inst || inst_in_decode_.valid
|| inst_in_scheduler_.valid_inst || inst_in_scheduler_.valid
|| inst_in_lsu_.valid_inst || inst_in_lsu_.valid
|| inst_in_exe_.valid_inst || inst_in_exe_.valid
|| inst_in_wb_.valid_inst; || inst_in_wb_.valid;
if (stages_have_valid) if (stages_have_valid)
return true; return true;
for (unsigned i = 0; i < warps_.size(); ++i) for (unsigned i = 0; i < warps_.size(); ++i) {
if (warps_[i].running()) { if (warps_[i].active()) {
return true; return true;
} }
}
return false; return false;
} }

View File

@@ -60,8 +60,8 @@ public:
return interruptEntry_; return interruptEntry_;
} }
unsigned long num_instructions() const { unsigned long num_insts() const {
return num_instructions_; return num_insts_;
} }
unsigned long num_steps() const { unsigned long num_steps() const {
@@ -70,9 +70,10 @@ public:
private: private:
bool renameTable_[32][32]; std::vector<std::vector<bool>> iRenameTable_;
bool vecRenameTable_[32]; std::vector<std::vector<bool>> fRenameTable_;
bool stalled_warps_[32]; std::vector<bool> vRenameTable_;
std::vector<bool> stalled_warps_;
bool foundSchedule_; bool foundSchedule_;
Word id_; Word id_;
@@ -83,10 +84,8 @@ private:
std::unordered_map<Word, std::set<Warp *>> barriers_; std::unordered_map<Word, std::set<Warp *>> barriers_;
int schedule_w_; int schedule_w_;
uint64_t steps_; uint64_t steps_;
uint64_t num_instructions_; uint64_t num_insts_;
Word interruptEntry_; Word interruptEntry_;
bool release_warp_;
int release_warp_num_;
trace_inst_t inst_in_fetch_; trace_inst_t inst_in_fetch_;
trace_inst_t inst_in_decode_; trace_inst_t inst_in_decode_;

View File

@@ -1,6 +1,8 @@
#pragma once #pragma once
//#define USE_DEBUG 9 #define USE_DEBUG 3
#define DEBUG_HEADER << "DEBUG "
//#define DEBUG_HEADER << "DEBUG " << __FILE__ << ':' << std::dec << __LINE__ << ": "
#ifdef USE_DEBUG #ifdef USE_DEBUG
@@ -11,13 +13,13 @@
#define D(lvl, x) do { \ #define D(lvl, x) do { \
if ((lvl) <= USE_DEBUG) { \ if ((lvl) <= USE_DEBUG) { \
std::cout << "DEBUG " << __FILE__ << ':' << std::dec << __LINE__ << ": " << x << std::endl; \ std::cout DEBUG_HEADER << x << std::endl; \
} \ } \
} while(0) } while(0)
#define DPH(lvl, x) do { \ #define DPH(lvl, x) do { \
if ((lvl) <= USE_DEBUG) { \ if ((lvl) <= USE_DEBUG) { \
std::cout << "DEBUG " << __FILE__ << ':' << std::dec << __LINE__ << ": " << x; \ std::cout DEBUG_HEADER << x; \
} \ } \
} while(0) } while(0)
@@ -27,10 +29,6 @@
} \ } \
} while(0) } while(0)
#define D_RAW(x) do { \
std::cout << x; \
} while (0)
#else #else
#define DX(x) #define DX(x)

View File

@@ -54,7 +54,7 @@ std::ostream &vortex::operator<<(std::ostream &os, Instr &instr) {
} }
Decoder::Decoder(const ArchDef &arch) { Decoder::Decoder(const ArchDef &arch) {
inst_s_ = arch.getWordSize() * 8; inst_s_ = arch.wsize() * 8;
opcode_s_ = 7; opcode_s_ = 7;
reg_s_ = 5; reg_s_ = 5;
func2_s_ = 2; func2_s_ = 2;
@@ -94,7 +94,11 @@ Decoder::Decoder(const ArchDef &arch) {
v_imm_mask_ = 0x7ff; v_imm_mask_ = 0x7ff;
} }
std::shared_ptr<Instr> Decoder::decode(const std::vector<Byte> &v, Size &idx, trace_inst_t *trace_inst) { std::shared_ptr<Instr> Decoder::decode(
const std::vector<Byte> &v,
Size &idx,
trace_inst_t *trace_inst)
{
Word code(readWord(v, idx, inst_s_ / 8)); Word code(readWord(v, idx, inst_s_ / 8));
// std::cout << "code: " << (int) code << " v: " << v << " indx: " << idx << "\n"; // std::cout << "code: " << (int) code << " v: " << v << " indx: " << idx << "\n";
@@ -107,12 +111,13 @@ std::shared_ptr<Instr> Decoder::decode(const std::vector<Byte> &v, Size &idx, tr
Word imeed, dest_bits, imm_bits, bit_11, bits_4_1, bit_10_5, Word imeed, dest_bits, imm_bits, bit_11, bits_4_1, bit_10_5,
bit_12, bits_19_12, bits_10_1, bit_20, unordered, func3; bit_12, bits_19_12, bits_10_1, bit_20, unordered, func3;
InstType curInstType = sc_instTable.at(op).iType; // get current inst type InstType curInstType = sc_instTable.at(op).iType;
if (op == Opcode::FL || op == Opcode::FS) { // need to find out whether it is vector or floating point inst if (op == Opcode::FL || op == Opcode::FS) {
// need to find out whether it is vector or floating point inst
Word width_bits = (code >> shift_func3_) & func3_mask_; Word width_bits = (code >> shift_func3_) & func3_mask_;
if ((width_bits == 0x1) || (width_bits == 0x2) if ((width_bits == 0x1) || (width_bits == 0x2)
|| (width_bits == 0x3) || (width_bits == 0x4)) { || (width_bits == 0x3) || (width_bits == 0x4)) {
curInstType = (op == Opcode::FL)? InstType::I_TYPE : InstType::S_TYPE; curInstType = (op == Opcode::FL) ? InstType::I_TYPE : InstType::S_TYPE;
} }
} }
@@ -122,52 +127,50 @@ std::shared_ptr<Instr> Decoder::decode(const std::vector<Byte> &v, Size &idx, tr
break; break;
case InstType::R_TYPE: case InstType::R_TYPE:
instr->setDestReg((code >> shift_rd_) & reg_mask_); if (op == Opcode::FCI) {
instr->setSrcReg((code >> shift_rs1_) & reg_mask_); instr->setDestFReg((code >> shift_rd_) & reg_mask_);
instr->setSrcReg((code >> shift_rs2_) & reg_mask_); instr->setSrcFReg((code >> shift_rs1_) & reg_mask_);
instr->setSrcFReg((code >> shift_rs2_) & reg_mask_);
} else {
instr->setDestReg((code >> shift_rd_) & reg_mask_);
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
}
instr->setFunc3((code >> shift_func3_) & func3_mask_); instr->setFunc3((code >> shift_func3_) & func3_mask_);
instr->setFunc7((code >> shift_func7_) & func7_mask_); instr->setFunc7((code >> shift_func7_) & func7_mask_);
trace_inst->valid_inst = true;
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
break; break;
case InstType::I_TYPE: case InstType::I_TYPE:
instr->setDestReg((code >> shift_rd_) & reg_mask_); if (op == Opcode::FCI || op == Opcode::FL) {
instr->setSrcReg((code >> shift_rs1_) & reg_mask_); instr->setDestFReg((code >> shift_rd_) & reg_mask_);
instr->setSrcFReg((code >> shift_rs1_) & reg_mask_);
} else {
instr->setDestReg((code >> shift_rd_) & reg_mask_);
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
}
instr->setFunc7((code >> shift_func7_) & func7_mask_); instr->setFunc7((code >> shift_func7_) & func7_mask_);
func3 = (code >> shift_func3_) & func3_mask_; func3 = (code >> shift_func3_) & func3_mask_;
instr->setFunc3(func3); instr->setFunc3(func3);
if ((func3 == 5) && (op != L_INST) && (op != Opcode::FL)) {
if ((func3 == 5) && (op != L_INST) && (op != FL)) {
// std::cout << "func7: " << func7 << "\n";
instr->setSrcImm(signExt(((code >> shift_rs2_) & reg_mask_), 5, reg_mask_)); instr->setSrcImm(signExt(((code >> shift_rs2_) & reg_mask_), 5, reg_mask_));
} else { } else {
instr->setSrcImm(signExt(code >> shift_i_immed_, 12, i_imm_mask_)); instr->setSrcImm(signExt(code >> shift_i_immed_, 12, i_imm_mask_));
} }
trace_inst->valid_inst = true;
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
break; break;
case InstType::S_TYPE: case InstType::S_TYPE:
// std::cout << "************STORE\n"; if (op == Opcode::FS) {
instr->setSrcReg((code >> shift_rs1_) & reg_mask_); instr->setSrcFReg((code >> shift_rs1_) & reg_mask_);
instr->setSrcReg((code >> shift_rs2_) & reg_mask_); instr->setSrcFReg((code >> shift_rs2_) & reg_mask_);
} else {
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
}
instr->setFunc3((code >> shift_func3_) & func3_mask_); instr->setFunc3((code >> shift_func3_) & func3_mask_);
dest_bits = (code >> shift_rd_) & reg_mask_; dest_bits = (code >> shift_rd_) & reg_mask_;
imm_bits = (code >> shift_s_b_immed_ & func7_mask_); imm_bits = (code >> shift_s_b_immed_ & func7_mask_);
imeed = (imm_bits << reg_s_) | dest_bits; imeed = (imm_bits << reg_s_) | dest_bits;
// std::cout << "ENC: store imeed: " << imeed << "\n";
instr->setSrcImm(signExt(imeed, 12, s_imm_mask_)); instr->setSrcImm(signExt(imeed, 12, s_imm_mask_));
trace_inst->valid_inst = true;
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
break; break;
case InstType::B_TYPE: case InstType::B_TYPE:
@@ -184,51 +187,34 @@ std::shared_ptr<Instr> Decoder::decode(const std::vector<Byte> &v, Size &idx, tr
bit_12 = imm_bits >> 6; bit_12 = imm_bits >> 6;
imeed = 0 | (bits_4_1 << 1) | (bit_10_5 << 5) | (bit_11 << 11) | (bit_12 << 12); imeed = 0 | (bits_4_1 << 1) | (bit_10_5 << 5) | (bit_11 << 11) | (bit_12 << 12);
instr->setSrcImm(signExt(imeed, 13, b_imm_mask_)); instr->setSrcImm(signExt(imeed, 13, b_imm_mask_));
trace_inst->valid_inst = true;
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
break; break;
case InstType::U_TYPE: case InstType::U_TYPE:
instr->setDestReg((code >> shift_rd_) & reg_mask_); instr->setDestReg((code >> shift_rd_) & reg_mask_);
instr->setSrcImm(signExt(code >> shift_j_u_immed_, 20, u_imm_mask_)); instr->setSrcImm(signExt(code >> shift_j_u_immed_, 20, u_imm_mask_));
trace_inst->valid_inst = true;
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
break; break;
case InstType::J_TYPE: case InstType::J_TYPE:
instr->setDestReg((code >> shift_rd_) & reg_mask_); instr->setDestReg((code >> shift_rd_) & reg_mask_);
// [20 | 10:1 | 11 | 19:12]
unordered = code >> shift_j_u_immed_; unordered = code >> shift_j_u_immed_;
bits_19_12 = unordered & 0xff; bits_19_12 = unordered & 0xff;
bit_11 = (unordered >> 8) & 0x1; bit_11 = (unordered >> 8) & 0x1;
bits_10_1 = (unordered >> 9) & 0x3ff; bits_10_1 = (unordered >> 9) & 0x3ff;
bit_20 = (unordered >> 19) & 0x1; bit_20 = (unordered >> 19) & 0x1;
imeed = 0 | (bits_10_1 << 1) | (bit_11 << 11) | (bits_19_12 << 12) | (bit_20 << 20); imeed = 0 | (bits_10_1 << 1) | (bit_11 << 11) | (bits_19_12 << 12) | (bit_20 << 20);
if (bit_20) { if (bit_20) {
imeed |= ~j_imm_mask_; imeed |= ~j_imm_mask_;
} }
instr->setSrcImm(imeed); instr->setSrcImm(imeed);
trace_inst->valid_inst = true;
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
break; break;
case InstType::V_TYPE: case InstType::V_TYPE:
D(3, "Entered here: instr type = vector" << op); D(3, "Entered here: instr type = vector" << op);
switch (op) { switch (op) {
case Opcode::VSET_ARITH: //TODO: arithmetic ops case Opcode::VSET_ARITH: //TODO: arithmetic ops
instr->setDestReg((code >> shift_rd_) & reg_mask_); instr->setDestVReg((code >> shift_rd_) & reg_mask_);
instr->setSrcReg((code >> shift_rs1_) & reg_mask_); instr->setSrcVReg((code >> shift_rs1_) & reg_mask_);
func3 = (code >> shift_func3_) & func3_mask_; func3 = (code >> shift_func3_) & func3_mask_;
instr->setFunc3(func3); instr->setFunc3(func3);
D(3, "Entered here: instr type = vector"); D(3, "Entered here: instr type = vector");
@@ -247,53 +233,34 @@ std::shared_ptr<Instr> Decoder::decode(const std::vector<Byte> &v, Size &idx, tr
instr->setVsew((immed >> 2) & 0x3); instr->setVsew((immed >> 2) & 0x3);
D(3, "sew " << ((immed >> 2) & 0x3)); D(3, "sew " << ((immed >> 2) & 0x3));
} else { } else {
instr->setSrcReg((code >> shift_rs2_) & reg_mask_); instr->setSrcVReg((code >> shift_rs2_) & reg_mask_);
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
} }
trace_inst->valid_inst = true;
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
} else { } else {
instr->setSrcReg((code >> shift_rs2_) & reg_mask_); instr->setSrcVReg((code >> shift_rs2_) & reg_mask_);
instr->setVmask((code >> shift_vmask_) & 0x1); instr->setVmask((code >> shift_vmask_) & 0x1);
instr->setFunc6((code >> shift_func6_) & func6_mask_); instr->setFunc6((code >> shift_func6_) & func6_mask_);
trace_inst->valid_inst = true;
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
} }
break; break;
case Opcode::VL: case Opcode::VL:
D(3, "vector load instr"); D(3, "vector load instr");
instr->setDestReg((code >> shift_rd_) & reg_mask_); instr->setDestVReg((code >> shift_rd_) & reg_mask_);
instr->setSrcReg((code >> shift_rs1_) & reg_mask_); instr->setSrcVReg((code >> shift_rs1_) & reg_mask_);
instr->setVlsWidth((code >> shift_func3_) & func3_mask_); instr->setVlsWidth((code >> shift_func3_) & func3_mask_);
instr->setSrcReg((code >> shift_rs2_) & reg_mask_); instr->setSrcVReg((code >> shift_rs2_) & reg_mask_);
instr->setVmask((code >> shift_vmask_)); instr->setVmask((code >> shift_vmask_));
instr->setVmop((code >> shift_vmop_) & func3_mask_); instr->setVmop((code >> shift_vmop_) & func3_mask_);
instr->setVnf((code >> shift_vnf_) & func3_mask_); instr->setVnf((code >> shift_vnf_) & func3_mask_);
trace_inst->valid_inst = true;
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
trace_inst->vd = ((code >> shift_rd_) & reg_mask_);
//trace_inst->vs2 = ((code>>shift_rs2_) & reg_mask_);
break; break;
case Opcode::VS: case Opcode::VS:
instr->setVs3((code >> shift_rd_) & reg_mask_); instr->setVs3((code >> shift_rd_) & reg_mask_);
instr->setSrcReg((code >> shift_rs1_) & reg_mask_); instr->setSrcVReg((code >> shift_rs1_) & reg_mask_);
instr->setVlsWidth((code >> shift_func3_) & func3_mask_); instr->setVlsWidth((code >> shift_func3_) & func3_mask_);
instr->setSrcReg((code >> shift_rs2_) & reg_mask_); instr->setSrcVReg((code >> shift_rs2_) & reg_mask_);
instr->setVmask((code >> shift_vmask_)); instr->setVmask((code >> shift_vmask_));
instr->setVmop((code >> shift_vmop_) & func3_mask_); instr->setVmop((code >> shift_vmop_) & func3_mask_);
instr->setVnf((code >> shift_vnf_) & func3_mask_); instr->setVnf((code >> shift_vnf_) & func3_mask_);
trace_inst->valid_inst = true;
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
//trace_inst->vd = ((code>>shift_rd_) & reg_mask_);
trace_inst->vs1 = ((code >> shift_rd_) & reg_mask_); //vs3
break; break;
default: default:
@@ -303,23 +270,47 @@ std::shared_ptr<Instr> Decoder::decode(const std::vector<Byte> &v, Size &idx, tr
break; break;
case R4_TYPE: case R4_TYPE:
// RT: add R4_TYPE decoder // RT: add R4_TYPE decoder
instr->setDestReg((code >> shift_rd_) & reg_mask_); instr->setDestFReg((code >> shift_rd_) & reg_mask_);
instr->setSrcReg((code >> shift_rs1_) & reg_mask_); instr->setSrcFReg((code >> shift_rs1_) & reg_mask_);
instr->setSrcReg((code >> shift_rs2_) & reg_mask_); instr->setSrcFReg((code >> shift_rs2_) & reg_mask_);
instr->setSrcReg((code >> shift_rs3_) & reg_mask_); instr->setSrcFReg((code >> shift_rs3_) & reg_mask_);
instr->setFunc3((code >> shift_func3_) & func3_mask_); instr->setFunc3((code >> shift_func3_) & func3_mask_);
trace_inst->valid_inst = true;
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
trace_inst->rs3 = ((code >> shift_rs3_) & reg_mask_);
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
break; break;
default: default:
std::cout << "Unrecognized argument class in word decoder.\n"; std::cout << "Unrecognized argument class in word decoder.\n";
std::abort(); std::abort();
} }
if (curInstType != InstType::N_TYPE) {
trace_inst->valid = true;
if (instr->hasRDest()) {
if (instr->is_FpDest()) {
trace_inst->frd = instr->getRDest();
} else if (instr->is_VDest()) {
trace_inst->vrd = instr->getRDest();
} else {
trace_inst->ird = instr->getRDest();
}
}
for (int i = 0; i < instr->getNRSrc(); ++i) {
if (instr->is_FpSrc(i)) {
if (i == 0) trace_inst->frs1 = instr->getRSrc(i);
else if (i == 1) trace_inst->frs2 = instr->getRSrc(i);
else if (i == 2) trace_inst->frs3 = instr->getRSrc(i);
else std::abort();
} else if (instr->is_VSrc(i)) {
if (i == 0) trace_inst->vrs1 = instr->getRSrc(i);
else if (i == 1) trace_inst->vrs2 = instr->getRSrc(i);
else std::abort();
} else {
if (i == 0) trace_inst->irs1 = instr->getRSrc(i);
else if (i == 1) trace_inst->irs2 = instr->getRSrc(i);
else std::abort();
}
}
}
D(2, "Decoded instr 0x" << std::hex << code << " into: " << instr << std::flush); D(2, "Decoded instr 0x" << std::hex << code << " into: " << instr << std::flush);
return instr; return instr;

File diff suppressed because it is too large Load Diff

View File

@@ -1,23 +0,0 @@
#include <iostream>
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "instr.h"
using namespace vortex;
void Instr::setVlmul(Word lmul) {
vlmul_ = std::pow(2, lmul);
}
void Instr::setVsew(Word sew) {
vsew_ = std::pow(2, 3+sew);
}
void Instr::setVediv(Word ediv) {
vediv_ = std::pow(2,ediv);
}

View File

@@ -52,9 +52,12 @@ public:
Instr() Instr()
: opcode_(Opcode::NOP) : opcode_(Opcode::NOP)
, nRsrc_(0) , nRsrc_(0)
, nPsrc_(0)
, hasImmSrc_(false) , hasImmSrc_(false)
, hasRDest_(false) , hasRDest_(false)
, is_FpDest_(false)
, is_VDest_(false)
, is_FpSrc_(0)
, is_VSrc_(0)
, func2_(0) , func2_(0)
, func3_(0) , func3_(0)
, func7_(0) , func7_(0)
@@ -65,20 +68,24 @@ public:
/* Setters used to "craft" the instruction. */ /* Setters used to "craft" the instruction. */
void setOpcode(Opcode opcode) { opcode_ = opcode; } void setOpcode(Opcode opcode) { opcode_ = opcode; }
void setDestReg(RegNum destReg) { hasRDest_ = true; rdest_ = destReg; } void setDestReg(int destReg) { hasRDest_ = true; rdest_ = destReg; }
void setSrcReg(RegNum srcReg) { rsrc_[nRsrc_++] = srcReg; } void setSrcReg(int srcReg) { rsrc_[nRsrc_++] = srcReg; }
void setDestFReg(int destReg) { hasRDest_ = true; is_FpDest_ = true; rdest_ = destReg; }
void setSrcFReg(int srcReg) { is_FpSrc_ |= (1 << nRsrc_); rsrc_[nRsrc_++] = srcReg; }
void setDestVReg(int destReg) { hasRDest_ = true; is_VDest_ = true; rdest_ = destReg; }
void setSrcVReg(int srcReg) { is_VSrc_ |= (1 << nRsrc_); rsrc_[nRsrc_++] = srcReg; }
void setFunc3(Word func3) { func3_ = func3; } void setFunc3(Word func3) { func3_ = func3; }
void setFunc7(Word func7) { func7_ = func7; } void setFunc7(Word func7) { func7_ = func7; }
void setSrcImm(Word srcImm) { hasImmSrc_ = true; immsrc_ = srcImm; } void setSrcImm(Word srcImm) { hasImmSrc_ = true; immsrc_ = srcImm; }
void setVsetImm(Word vset_imm) { if(vset_imm) vsetImm_ = true; else vsetImm_ = false; } void setVsetImm(Word vset_imm) { if (vset_imm) vsetImm_ = true; else vsetImm_ = false; }
void setVlsWidth(Word width) { vlsWidth_ = width; } void setVlsWidth(Word width) { vlsWidth_ = width; }
void setVmop(Word mop) { vMop_ = mop; } void setVmop(Word mop) { vMop_ = mop; }
void setVnf(Word nf) { vNf_ = nf; } void setVnf(Word nf) { vNf_ = nf; }
void setVmask(Word mask) { vmask_ = mask; } void setVmask(Word mask) { vmask_ = mask; }
void setVs3(Word vs) { vs3_ = vs; } void setVs3(Word vs) { vs3_ = vs; }
void setVlmul(Word lmul); void setVlmul(Word lmul) { vlmul_ = 1 << lmul; }
void setVsew(Word sew); void setVsew(Word sew) { vsew_ = 1 << (3+sew); }
void setVediv(Word ediv); void setVediv(Word ediv) { vediv_ = 1 << ediv; }
void setFunc6(Word func6) { func6_ = func6; } void setFunc6(Word func6) { func6_ = func6; }
/* Getters used by encoders. */ /* Getters used by encoders. */
@@ -86,10 +93,10 @@ public:
Word getFunc3() const { return func3_; } Word getFunc3() const { return func3_; }
Word getFunc6() const { return func6_; } Word getFunc6() const { return func6_; }
Word getFunc7() const { return func7_; } Word getFunc7() const { return func7_; }
RegNum getNRSrc() const { return nRsrc_; } int getNRSrc() const { return nRsrc_; }
RegNum getRSrc(RegNum i) const { return rsrc_[i]; } int getRSrc(int i) const { return rsrc_[i]; }
bool hasRDest() const { return hasRDest_; } bool hasRDest() const { return hasRDest_; }
RegNum getRDest() const { return rdest_; } int getRDest() const { return rdest_; }
bool hasImm() const { return hasImmSrc_; } bool hasImm() const { return hasImmSrc_; }
Word getImm() const { return immsrc_; } Word getImm() const { return immsrc_; }
bool getVsetImm() const { return vsetImm_; } bool getVsetImm() const { return vsetImm_; }
@@ -102,6 +109,12 @@ public:
Word getVsew() const { return vsew_; } Word getVsew() const { return vsew_; }
Word getVediv() const { return vediv_; } Word getVediv() const { return vediv_; }
bool is_FpDest() const { return is_FpDest_; }
bool is_FpSrc(int i) const { return (is_FpSrc_ >> i) & 0x1; }
bool is_VDest() const { return is_VDest_; }
bool is_VSrc(int i) const { return (is_VSrc_ >> i) & 0x1; }
private: private:
enum { enum {
@@ -110,15 +123,18 @@ private:
Opcode opcode_; Opcode opcode_;
int nRsrc_; int nRsrc_;
int nPsrc_;
bool hasImmSrc_; bool hasImmSrc_;
bool hasRDest_; bool hasRDest_;
bool is_FpDest_;
bool is_VDest_;
int is_FpSrc_;
int is_VSrc_;
Word immsrc_; Word immsrc_;
Word func2_; Word func2_;
Word func3_; Word func3_;
Word func7_; Word func7_;
RegNum rsrc_[MAX_REG_SOURCES]; int rsrc_[MAX_REG_SOURCES];
RegNum rdest_; int rdest_;
//Vector //Vector
bool vsetImm_; bool vsetImm_;

View File

@@ -15,8 +15,8 @@ using namespace vortex;
int main(int argc, char **argv) { int main(int argc, char **argv) {
std::string archString("rv32i"); std::string archString("rv32imf");
int num_cores(1); int num_cores(NUM_CORES * NUM_CLUSTERS);
int num_warps(NUM_WARPS); int num_warps(NUM_WARPS);
int num_threads(NUM_THREADS); int num_threads(NUM_THREADS);
std::string imgFileName; std::string imgFileName;
@@ -48,7 +48,7 @@ int main(int argc, char **argv) {
ArchDef arch(archString, num_cores, num_warps, num_threads); ArchDef arch(archString, num_cores, num_warps, num_threads);
Decoder decoder(arch); Decoder decoder(arch);
MemoryUnit mu(4096, arch.getWordSize(), true); MemoryUnit mu(4096, arch.wsize(), true);
RAM old_ram; RAM old_ram;
old_ram.loadHexImpl(imgFileName.c_str()); old_ram.loadHexImpl(imgFileName.c_str());
@@ -59,7 +59,7 @@ int main(int argc, char **argv) {
std::vector<std::shared_ptr<Core>> cores(num_cores); std::vector<std::shared_ptr<Core>> cores(num_cores);
for (int i = 0; i < num_cores; ++i) { for (int i = 0; i < num_cores; ++i) {
cores[i] = std::make_shared<Core>(arch, decoder, mu); cores[i] = std::make_shared<Core>(arch, decoder, mu, i);
} }
bool running; bool running;

View File

@@ -1,78 +0,0 @@
#include <iostream>
#include <iomanip>
#include <string>
#include <sstream>
#include <fstream>
#include <stdlib.h>
#include <sys/stat.h>
#include "debug.h"
#include "types.h"
#include "core.h"
#include "args.h"
using namespace vortex;
int main(int argc, char **argv) {
std::string archString("rv32i");
int num_cores(1);
int num_warps(NUM_WARPS);
int num_threads(NUM_THREADS);
std::string imgFileName;
bool showHelp(false);
bool showStats(false);
/* Read the command line arguments. */
CommandLineArgFlag fh("-h", "--help", "", showHelp);
CommandLineArgSetter<std::string> fa("-a", "--arch", "", archString);
CommandLineArgSetter<std::string> fi("-i", "--image", "", imgFileName);
CommandLineArgSetter<int> fc("-c", "--cores", "", num_cores);
CommandLineArgSetter<int> fw("-w", "--warps", "", num_warps);
CommandLineArgSetter<int> ft("-t", "--threads", "", num_threads);
CommandLineArgFlag fs("-s", "--stats", "", showStats);
CommandLineArg::readArgs(argc - 1, argv + 1);
if (showHelp || imgFileName.empty()) {
std::cout << "Vortex emulator command line arguments:\n"
" -i, --image <filename> Program RAM image\n"
" -c, --cores <num> Number of cores\n"
" -w, --warps <num> Number of warps\n"
" -t, --threads <num> Number of threads\n"
" -a, --arch <arch string> Architecture string\n"
" -s, --stats Print stats on exit.\n";
return 0;
}
ArchDef arch(archString, num_cores, num_warps, num_threads);
Decoder decoder(arch);
MemoryUnit mu(4096, arch.getWordSize(), true);
RAM old_ram;
old_ram.loadHexImpl(imgFileName.c_str());
mu.attach(old_ram, 0);
struct stat hello;
fstat(0, &hello);
std::vector<std::shared_ptr<Core>> cores(num_cores);
for (int i = 0; i < num_cores; ++i) {
cores[i] = std::make_shared<Core>(arch, decoder, mu);
}
bool running;
do {
running = false;
for (int i = 0; i < num_cores; ++i) {
if (!cores[i]->running())
continue;
running = true;
cores[i]->step();
}
} while (running);
return 0;
}

View File

@@ -5,22 +5,27 @@ namespace vortex {
struct trace_inst_t { struct trace_inst_t {
// Warp step // Warp step
bool valid_inst; bool valid;
unsigned pc; unsigned PC;
// Core scheduler // Core scheduler
int wid; int wid;
// Encoder // Encoder
int rs1; int irs1;
int rs2; int irs2;
int rs3; int ird;
int rd;
//Encoder // Floating-point
int vs1; int frs1;
int vs2; int frs2;
int vd; int frs3;
int frd;
// Vector extension
int vrs1;
int vrs2;
int vrd;
// Instruction execute // Instruction execute
bool is_lw; bool is_lw;

View File

@@ -1,20 +1,18 @@
#pragma once #pragma once
#include <stdint.h> #include <stdint.h>
#include <bitset>
#include <VX_config.h> #include <VX_config.h>
namespace vortex { namespace vortex {
typedef uint8_t Byte; typedef uint8_t Byte;
typedef uint32_t Word; typedef uint32_t Word;
typedef uint32_t Word_u;
typedef int32_t Word_s;
typedef Word_u Addr; typedef uint32_t Addr;
typedef Word_u Size; typedef uint32_t Size;
typedef unsigned RegNum; typedef std::bitset<32> ThreadMask;
typedef unsigned ThdNum;
enum MemFlags { enum MemFlags {
RD_USR = 1, RD_USR = 1,

View File

@@ -12,15 +12,15 @@ Word vortex::signExt(Word w, Size bit, Word mask) {
return w; return w;
} }
void vortex::wordToBytes(Byte *b, Word_u w, Size wordSize) { void vortex::wordToBytes(Byte *b, Word w, Size wordSize) {
while (wordSize--) { while (wordSize--) {
*(b++) = w & 0xff; *(b++) = w & 0xff;
w >>= 8; w >>= 8;
} }
} }
Word_u vortex::bytesToWord(const Byte *b, Size wordSize) { Word vortex::bytesToWord(const Byte *b, Size wordSize) {
Word_u w = 0; Word w = 0;
b += wordSize-1; b += wordSize-1;
while (wordSize--) { while (wordSize--) {
w <<= 8; w <<= 8;
@@ -29,15 +29,15 @@ Word_u vortex::bytesToWord(const Byte *b, Size wordSize) {
return w; return w;
} }
Word_u vortex::flagsToWord(bool r, bool w, bool x) { Word vortex::flagsToWord(bool r, bool w, bool x) {
Word_u word = 0; Word word = 0;
if (r) word |= RD_USR; if (r) word |= RD_USR;
if (w) word |= WR_USR; if (w) word |= WR_USR;
if (x) word |= EX_USR; if (x) word |= EX_USR;
return word; return word;
} }
void vortex::wordToFlags(bool &r, bool &w, bool &x, Word_u f) { void vortex::wordToFlags(bool &r, bool &w, bool &x, Word f) {
r = f & RD_USR; r = f & RD_USR;
w = f & WR_USR; w = f & WR_USR;
x = f & EX_USR; x = f & EX_USR;
@@ -49,10 +49,10 @@ Byte vortex::readByte(const std::vector<Byte> &b, Size &n) {
return b[n++]; return b[n++];
} }
Word_u vortex::readWord(const std::vector<Byte> &b, Size &n, Size wordSize) { Word vortex::readWord(const std::vector<Byte> &b, Size &n, Size wordSize) {
if (b.size() - n < wordSize) if (b.size() - n < wordSize)
throw std::out_of_range("out of range"); throw std::out_of_range("out of range");
Word_u w(0); Word w(0);
n += wordSize; n += wordSize;
// std::cout << "wordSize: " << wordSize << "\n"; // std::cout << "wordSize: " << wordSize << "\n";
for (Size i = 0; i < wordSize; i++) { for (Size i = 0; i < wordSize; i++) {

View File

@@ -12,13 +12,13 @@ void unused(Args&&...) {}
Word signExt(Word w, Size bit, Word mask); Word signExt(Word w, Size bit, Word mask);
Word_u bytesToWord(const Byte *b, Size wordSize); Word bytesToWord(const Byte *b, Size wordSize);
void wordToBytes(Byte *b, Word_u w, Size wordSize); void wordToBytes(Byte *b, Word w, Size wordSize);
Word_u flagsToWord(bool r, bool w, bool x); Word flagsToWord(bool r, bool w, bool x);
void wordToFlags(bool &r, bool &w, bool &x, Word_u f); void wordToFlags(bool &r, bool &w, bool &x, Word f);
Byte readByte(const std::vector<Byte> &b, Size &n); Byte readByte(const std::vector<Byte> &b, Size &n);
Word_u readWord(const std::vector<Byte> &b, Size &n, Size wordSize); Word readWord(const std::vector<Byte> &b, Size &n, Size wordSize);
void writeByte(std::vector<Byte> &p, Size &n, Byte b); void writeByte(std::vector<Byte> &p, Size &n, Byte b);
void writeWord(std::vector<Byte> &p, Size &n, Size wordSize, Word w); void writeWord(std::vector<Byte> &p, Size &n, Size wordSize, Word w);

View File

@@ -2,6 +2,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <unistd.h> #include <unistd.h>
#include <math.h> #include <math.h>
#include <assert.h>
#include "util.h" #include "util.h"
#include "instr.h" #include "instr.h"
@@ -11,87 +12,67 @@ using namespace vortex;
Warp::Warp(Core *core, Word id) Warp::Warp(Core *core, Word id)
: id_(id) : id_(id)
, active_(false)
, core_(core) , core_(core)
, pc_(0x80000000) , PC_(0x80000000)
, shadowPc_(0)
, activeThreads_(0)
, shadowActiveThreads_(0)
, shadowIReg_(core_->arch().getNumRegs())
, VLEN_(1024)
, spawned_(false)
, steps_(0) , steps_(0)
, insts_(0) , insts_(0)
, loads_(0) , loads_(0)
, stores_(0) { , stores_(0) {
D(3, "Creating a new thread with PC: " << std::hex << pc_);
/* Build the register file. */
Word regNum(0);
for (Word j = 0; j < core_->arch().getNumThreads(); ++j) {
iRegFile_.push_back(std::vector<Reg<Word>>(0));
for (Word i = 0; i < core_->arch().getNumRegs(); ++i) {
iRegFile_[j].push_back(Reg<Word>(id, regNum++));
}
bool act = false; tmask_.reset();
if (j == 0)
act = true;
tmask_.push_back(act);
shadowTmask_.push_back(act);
}
for (Word i = 0; i < (1 << 12); i++) { iRegFile_.resize(core_->arch().num_threads(), std::vector<Word>(core_->arch().num_regs(), 0));
csrs_.push_back(Reg<uint32_t>(id, regNum++)); fRegFile_.resize(core_->arch().num_threads(), std::vector<Word>(core_->arch().num_regs(), 0));
} vRegFile_.resize(core_->arch().num_regs(), std::vector<Byte>(core_->arch().vsize(), 0));
csrs_.resize(core_->arch().num_csrs());
/* Set initial register contents. */
iRegFile_[0][0] = (core_->arch().getNumThreads() << (core_->arch().getWordSize() * 8 / 2)) | id;
} }
void Warp::step(trace_inst_t *trace_inst) { void Warp::step(trace_inst_t *trace_inst) {
assert(tmask_.any());
Size fetchPos(0); Size fetchPos(0);
Size decPos; Size decPos;
Size wordSize(core_->arch().getWordSize()); Size wordSize(core_->arch().wsize());
std::vector<Byte> fetchBuffer(wordSize); std::vector<Byte> fetchBuffer(wordSize);
if (activeThreads_ == 0)
return;
++steps_; ++steps_;
D(3, "current PC=0x" << std::hex << pc_); D(3, "current PC=0x" << std::hex << PC_);
// std::cout << "pc: " << std::hex << pc << "\n"; // std::cout << "PC: " << std::hex << PC << "\n";
trace_inst->pc = pc_; trace_inst->PC = PC_;
/* Fetch and decode. */ /* Fetch and decode. */
if (wordSize < sizeof(pc_)) if (wordSize < sizeof(PC_))
pc_ &= ((1ll << (wordSize * 8)) - 1); PC_ &= ((1ll << (wordSize * 8)) - 1);
unsigned fetchSize = 4; unsigned fetchSize = 4;
fetchBuffer.resize(fetchSize); fetchBuffer.resize(fetchSize);
Word fetched = core_->mem().fetch(pc_ + fetchPos, 0); Word fetched = core_->mem().fetch(PC_ + fetchPos, 0);
writeWord(fetchBuffer, fetchPos, fetchSize, fetched); writeWord(fetchBuffer, fetchPos, fetchSize, fetched);
decPos = 0; decPos = 0;
std::shared_ptr<Instr> instr = core_->decoder().decode(fetchBuffer, decPos, trace_inst); std::shared_ptr<Instr> instr = core_->decoder().decode(fetchBuffer, decPos, trace_inst);
// Update pc // Update PC
pc_ += decPos; PC_ += decPos;
// Execute // Execute
this->execute(*instr, trace_inst); this->execute(*instr, trace_inst);
// At Debug Level 3, print debug info after each instruction. // At Debug Level 3, print debug info after each instruction.
D(3, "Register state:"); D(4, "Register state:");
for (unsigned i = 0; i < iRegFile_[0].size(); ++i) { for (int i = 0; i < core_->arch().num_regs(); ++i) {
D_RAW(" %r" << std::setfill('0') << std::setw(2) << std::dec << i << ':'); DPN(4, " %r" << std::setfill('0') << std::setw(2) << std::dec << i << ':');
for (unsigned j = 0; j < (activeThreads_); ++j) for (int j = 0; j < core_->arch().num_threads(); ++j) {
D_RAW(' ' << std::setfill('0') << std::setw(8) << std::hex << iRegFile_[j][i] << std::setfill(' ') << ' '); DPN(4, ' ' << std::setfill('0') << std::setw(8) << std::hex << iRegFile_[j][i] << std::setfill(' ') << ' ');
D_RAW('(' << shadowIReg_[i] << ')' << std::endl); }
DPN(4, std::endl);
} }
DPH(3, "Thread mask:"); DPH(3, "Thread mask:");
for (unsigned i = 0; i < tmask_.size(); ++i) for (int i = 0; i < core_->arch().num_threads(); ++i)
DPN(3, " " << tmask_[i]); DPN(3, " " << tmask_[i]);
DPN(3, "\n"); DPN(3, "\n");
} }

View File

@@ -7,69 +7,25 @@
namespace vortex { namespace vortex {
template <typename T>
class Reg {
public:
Reg()
: value_(0), cpuId_(0), regNum_(0) {}
Reg(Word c, Word n)
: value_(0), cpuId_(c), regNum_(n) {}
Reg(Word c, Word n, T v)
: value_(v), cpuId_(c), regNum_(n) {}
const T &value() const {
return value_;
}
Reg &operator=(T r) {
if (regNum_) {
value_ = r;
doWrite();
}
return *this;
}
operator T() const {
doRead();
return value_;
}
void trunc(Size s) {
Word mask((~0ull >> (sizeof(Word) - s) * 8));
value_ &= mask;
}
private:
T value_;
Word cpuId_, regNum_;
void doWrite() const {}
void doRead() const {}
};
///////////////////////////////////////////////////////////////////////////////
struct DomStackEntry { struct DomStackEntry {
DomStackEntry( DomStackEntry(const ThreadMask &tmask, Word PC)
unsigned p, : tmask(tmask)
const std::vector<std::vector<Reg<Word>>> &m, , PC(PC)
std::vector<bool> &tm, , fallThrough(false)
Word pc , unanimous(false)
) : pc(pc) {}
, fallThrough(false)
, uni(false) {
for (unsigned i = 0; i < m.size(); ++i) {
tmask.push_back(!bool(m[i][p]) && tm[i]);
}
}
DomStackEntry(const std::vector<bool> &tmask) DomStackEntry(const ThreadMask &tmask)
: tmask(tmask), fallThrough(true), uni(false) {} : tmask(tmask)
, PC(0)
, fallThrough(true)
, unanimous(false)
{}
std::vector<bool> tmask; ThreadMask tmask;
Word pc; Word PC;
bool fallThrough; bool fallThrough;
bool uni; bool unanimous;
}; };
struct vtype { struct vtype {
@@ -86,11 +42,13 @@ class trace_inst_t;
class Warp { class Warp {
public: public:
Warp(Core *core, Word id = 0); Warp(Core *core, Word id = 0);
void step(trace_inst_t *);
bool running() const { bool active() const {
return (activeThreads_ != 0); return tmask_.any();
}
std::size_t getActiveThreads() const {
return tmask_.count();
} }
void printStats() const; void printStats() const;
@@ -103,68 +61,40 @@ public:
return id_; return id_;
} }
Word get_pc() const { Word getPC() const {
return pc_; return PC_;
} }
void set_pc(Word pc) { void setPC(Word PC) {
pc_ = pc; PC_ = PC;
}
void setActiveThreads(Size activeThreads) {
activeThreads_ = activeThreads;
}
Size getActiveThreads() const {
return activeThreads_;
}
void setSpawned(bool spawned) {
spawned_ = spawned;
} }
void setTmask(size_t index, bool value) { void setTmask(size_t index, bool value) {
tmask_[index] = value; tmask_[index] = value;
} }
void step(trace_inst_t *);
private: private:
void execute(Instr &instr, trace_inst_t *); void execute(Instr &instr, trace_inst_t *);
struct MemAccess {
MemAccess(bool w, Word a)
: wr(w), addr(a) {}
bool wr;
Word addr;
};
std::vector<MemAccess> memAccesses_;
Word id_; Word id_;
bool active_;
Core *core_; Core *core_;
Word pc_;
Word shadowPc_; Word PC_;
Size activeThreads_; ThreadMask tmask_;
Size shadowActiveThreads_;
std::vector<std::vector<Reg<Word>>> iRegFile_; std::vector<std::vector<Word>> iRegFile_;
std::vector<std::vector<Reg<Word>>> fRegFile_; std::vector<std::vector<Word>> fRegFile_;
std::vector<Reg<uint32_t>> csrs_; std::vector<std::vector<Byte>> vRegFile_;
std::vector<Word> csrs_;
std::vector<bool> tmask_;
std::vector<bool> shadowTmask_;
std::stack<DomStackEntry> domStack_; std::stack<DomStackEntry> domStack_;
std::vector<Word> shadowIReg_; struct vtype vtype_;
std::vector<Word> shadowFReg_; int vl_;
struct vtype vtype_; // both of them are XLEN WIDE
int vl_; // both of them are XLEN WIDE
Word VLEN_; // total vector length
std::vector<std::vector<Reg<char *>>> vregFile_; // 32 vector registers
bool spawned_;
unsigned long steps_; unsigned long steps_;
unsigned long insts_; unsigned long insts_;
unsigned long loads_; unsigned long loads_;