simX floating-point fixes and refactoring

This commit is contained in:
Blaise Tine
2021-03-08 03:44:08 -08:00
parent e4cdefc3b0
commit 8eac091fb5
21 changed files with 2425 additions and 2348 deletions

View File

@@ -12,8 +12,8 @@ CXXFLAGS += -DDUMP_PERF_STATS
#CONFIGS ?= -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
CXXFLAGS += $(CONFIGS)
@@ -21,11 +21,11 @@ LDFLAGS += -shared -pthread
#LDFLAGS += -dynamiclib -pthread
SRCS = vortex.cpp ../common/vx_utils.cpp
SRCS += $(SIMX_DIR)/util.cpp $(SIMX_DIR)/args.cpp $(SIMX_DIR)/mem.cpp $(SIMX_DIR)/warp.cpp $(SIMX_DIR)/core.cpp $(SIMX_DIR)/decode.cpp $(SIMX_DIR)/execute.cpp
SRCS += $(SIMX_DIR)/util.cpp $(SIMX_DIR)/args.cpp $(SIMX_DIR)/mem.cpp $(SIMX_DIR)/pipeline.cpp $(SIMX_DIR)/warp.cpp $(SIMX_DIR)/core.cpp $(SIMX_DIR)/decode.cpp $(SIMX_DIR)/execute.cpp
# Debugigng
ifdef DEBUG
CXXFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
CXXFLAGS += $(DBG_FLAGS) -DUSE_DEBUG=3
else
CXXFLAGS += -DNDEBUG
endif

View File

@@ -70,6 +70,7 @@ public:
, is_running_(false)
, thread_(__thread_proc__, this)
, ram_((1<<12), (1<<20)) {
mem_allocation_ = ALLOC_BASE_ADDR;
mmu_.attach(ram_, 0, 0xffffffff);
for (int i = 0; i < arch_.num_cores(); ++i) {
@@ -100,12 +101,13 @@ public:
if (dest_addr + asize > ram_.size())
return -1;
ram_.write(dest_addr, asize, (uint8_t*)src + src_offset);
/*printf("VXDRV: upload %d bytes to 0x%x\n", size, dest_addr);
for (int i = 0; i < size; i += 4) {
printf("mem-write: 0x%x <- 0x%x\n", dest_addr + i, *(uint32_t*)((uint8_t*)src + src_offset + i));
}*/
ram_.write(dest_addr, asize, (uint8_t*)src + src_offset);
return 0;
}
@@ -127,7 +129,10 @@ public:
int start() {
mutex_.lock();
is_running_ = true;
for (int i = 0; i < arch_.num_cores(); ++i) {
cores_[i]->clear();
}
is_running_ = true;
mutex_.unlock();
return 0;
@@ -162,14 +167,12 @@ private:
void run() {
bool running;
int num_cores = cores_.at(0)->arch().num_cores();
do {
running = false;
for (int i = 0; i < num_cores; ++i) {
if (!cores_[i]->running())
continue;
running = true;
cores_[i]->step();
for (auto& core : cores_) {
core->step();
if (core->running())
running = true;
}
} while (running);
}

View File

@@ -13,7 +13,7 @@ RTL_DIR = ../hw/rtl
PROJECT = simX
SRCS = util.cpp args.cpp mem.cpp warp.cpp core.cpp decode.cpp execute.cpp main.cpp
SRCS = util.cpp args.cpp mem.cpp pipeline.cpp warp.cpp core.cpp decode.cpp execute.cpp main.cpp
# Debugigng
ifdef DEBUG

View File

@@ -10,119 +10,77 @@
#include "core.h"
#include "debug.h"
#define INIT_TRACE(trace_inst) \
trace_inst.valid = false; \
trace_inst.PC = 0; \
trace_inst.wid = schedule_w_; \
trace_inst.irs1 = -1; \
trace_inst.irs2 = -1; \
trace_inst.frs1 = -1; \
trace_inst.frs2 = -1; \
trace_inst.frs3 = -1; \
trace_inst.frd = -1; \
trace_inst.ird = -1; \
trace_inst.vrs1 = -1; \
trace_inst.vrs2 = -1; \
trace_inst.vrd = -1; \
trace_inst.is_lw = false; \
trace_inst.is_sw = false; \
if (trace_inst.mem_addresses != NULL) \
free(trace_inst.mem_addresses); \
trace_inst.mem_addresses = (unsigned *)malloc(32 * sizeof(unsigned)); \
for (int tid = 0; tid < arch_.num_threads(); tid++) \
trace_inst.mem_addresses[tid] = 0xdeadbeef; \
trace_inst.mem_stall_cycles = 0; \
trace_inst.fetch_stall_cycles = 0; \
trace_inst.stall_warp = false; \
trace_inst.wspawn = false; \
trace_inst.stalled = false;
#define CPY_TRACE(drain, source) \
drain.valid = source.valid; \
drain.PC = source.PC; \
drain.wid = source.wid; \
drain.irs1 = source.irs1; \
drain.irs2 = source.irs2; \
drain.ird = source.ird; \
drain.frs1 = source.frs1; \
drain.frs2 = source.frs2; \
drain.frs3 = source.frs3; \
drain.frd = source.frd; \
drain.vrs1 = source.vrs1; \
drain.vrs2 = source.vrs2; \
drain.vrd = source.vrd; \
drain.is_lw = source.is_lw; \
drain.is_sw = source.is_sw; \
for (int tid = 0; tid < arch_.num_threads(); tid++) \
drain.mem_addresses[tid] = source.mem_addresses[tid]; \
drain.mem_stall_cycles = source.mem_stall_cycles; \
drain.fetch_stall_cycles = source.fetch_stall_cycles; \
drain.stall_warp = source.stall_warp; \
drain.wspawn = source.wspawn; \
drain.stalled = false;
using namespace vortex;
void printTrace(trace_inst_t *trace, const char *stage_name) {
__unused(trace, stage_name);
D(4, stage_name << ": valid=" << trace->valid);
D(4, stage_name << ": PC=" << std::hex << trace->PC << std::dec);
D(4, stage_name << ": wid=" << trace->wid);
D(4, stage_name << ": rd=" << trace->ird << ", rs1=" << trace->irs1 << ", trs2=" << trace->irs2);
D(4, stage_name << ": is_lw=" << trace->is_lw);
D(4, stage_name << ": is_sw=" << trace->is_sw);
D(4, stage_name << ": fetch_stall_cycles=" << trace->fetch_stall_cycles);
D(4, stage_name << ": mem_stall_cycles=" << trace->mem_stall_cycles);
D(4, stage_name << ": stall_warp=" << trace->stall_warp);
D(4, stage_name << ": wspawn=" << trace->wspawn);
D(4, stage_name << ": stalled=" << trace->stalled);
}
Core::Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id)
: id_(id)
, arch_(arch)
, decoder_(decoder)
, mem_(mem)
, shared_mem_(1, SMEM_SIZE)
, steps_(0)
, num_insts_(0) {
, inst_in_schedule_("schedule")
, inst_in_fetch_("fetch")
, inst_in_decode_("decode")
, inst_in_issue_("issue")
, inst_in_execute_("execute")
, inst_in_writeback_("writeback") {
in_use_iregs_.resize(arch.num_warps(), 0);
in_use_fregs_.resize(arch.num_warps(), 0);
in_use_vregs_.reset();
foundSchedule_ = true;
schedule_w_ = 0;
csrs_.resize(arch_.num_csrs(), 0);
memset(&inst_in_fetch_, 0, sizeof(inst_in_fetch_));
memset(&inst_in_decode_, 0, sizeof(inst_in_decode_));
memset(&inst_in_scheduler_, 0, sizeof(inst_in_scheduler_));
memset(&inst_in_exe_, 0, sizeof(inst_in_exe_));
memset(&inst_in_lsu_, 0, sizeof(inst_in_lsu_));
memset(&inst_in_wb_, 0, sizeof(inst_in_wb_));
INIT_TRACE(inst_in_fetch_);
INIT_TRACE(inst_in_decode_);
INIT_TRACE(inst_in_scheduler_);
INIT_TRACE(inst_in_exe_);
INIT_TRACE(inst_in_lsu_);
INIT_TRACE(inst_in_wb_);
iRenameTable_.resize(arch.num_warps(), std::vector<bool>(arch.num_regs(), false));
fRenameTable_.resize(arch.num_warps(), std::vector<bool>(arch.num_regs(), false));
vRenameTable_.resize(arch.num_regs(), false);
csrs_.resize(arch_.num_csrs());
fcsrs_.resize(arch_.num_warps(), 0);
barriers_.resize(arch_.num_barriers(), 0);
stalled_warps_.resize(arch.num_warps(), false);
warps_.resize(arch_.num_warps());
for (int i = 0; i < arch_.num_warps(); ++i) {
warps_.emplace_back(this, i);
warps_[i] = std::make_shared<Warp>(this, i);
}
warps_[0].setTmask(0, true);
this->clear();
}
Core::~Core() {
//--
void Core::clear() {
for (int w = 0; w < arch_.num_warps(); ++w) {
in_use_iregs_[w].reset();
in_use_fregs_[w].reset();
}
stalled_warps_.reset();
in_use_vregs_.reset();
for (auto& csr : csrs_) {
csr = 0;
}
for (auto& fcsr : fcsrs_) {
fcsr = 0;
}
for (auto& barrier : barriers_) {
barrier.reset();
}
for (auto warp : warps_) {
warp->clear();
}
inst_in_schedule_.clear();
inst_in_fetch_.clear();
inst_in_decode_.clear();
inst_in_issue_.clear();
inst_in_execute_.clear();
inst_in_writeback_.clear();
steps_ = 0;
insts_ = 0;
loads_ = 0;
stores_ = 0;
inst_in_schedule_.valid = true;
warps_[0]->setTmask(0, true);
}
void Core::step() {
@@ -138,243 +96,152 @@ void Core::step() {
DPN(3, "\n");
this->writeback();
this->load_store();
this->execute_unit();
this->scheduler();
this->execute();
this->issue();
this->decode();
this->fetch();
this->schedule();
DPN(3, std::flush);
}
void Core::warpScheduler() {
foundSchedule_ = false;
int next_warp = schedule_w_;
void Core::schedule() {
if (!inst_in_schedule_.enter(&inst_in_fetch_))
return;
bool foundSchedule = false;
int scheduled_warp = inst_in_schedule_.wid;
for (size_t wid = 0; wid < warps_.size(); ++wid) {
// round robin scheduling
next_warp = (next_warp + 1) % warps_.size();
bool is_active = warps_[next_warp].active();
bool stalled = stalled_warps_[next_warp];
scheduled_warp = (scheduled_warp + 1) % warps_.size();
bool is_active = warps_[scheduled_warp]->active();
bool stalled = stalled_warps_[scheduled_warp];
if (is_active && !stalled) {
foundSchedule_ = true;
foundSchedule = true;
break;
}
}
schedule_w_ = next_warp;
if (!foundSchedule)
return;
D(3, "Schedule: wid=" << scheduled_warp);
inst_in_schedule_.wid = scheduled_warp;
// advance pipeline
inst_in_schedule_.next(&inst_in_fetch_);
}
void Core::fetch() {
if ((!inst_in_scheduler_.stalled)
&& (inst_in_fetch_.fetch_stall_cycles == 0)) {
INIT_TRACE(inst_in_fetch_);
if (!inst_in_fetch_.enter(&inst_in_issue_))
return;
if (foundSchedule_) {
auto active_threads_b = warps_[schedule_w_].getActiveThreads();
num_insts_ = num_insts_ + warps_[schedule_w_].getActiveThreads();
int wid = inst_in_fetch_.wid;
auto active_threads_b = warps_[wid]->getActiveThreads();
warps_[wid]->step(&inst_in_fetch_);
auto active_threads_a = warps_[wid]->getActiveThreads();
warps_[schedule_w_].step(&inst_in_fetch_);
auto active_threads_a = warps_[schedule_w_].getActiveThreads();
if (active_threads_b != active_threads_a) {
D(3, "** warp #" << schedule_w_ << " active threads changed from " << active_threads_b << " to " << active_threads_a);
}
this->getCacheDelays(&inst_in_fetch_);
if (inst_in_fetch_.stall_warp) {
stalled_warps_[inst_in_fetch_.wid] = true;
}
}
this->warpScheduler();
} else {
inst_in_fetch_.stalled = false;
if (inst_in_fetch_.fetch_stall_cycles > 0)
--inst_in_fetch_.fetch_stall_cycles;
insts_ += active_threads_b;
if (active_threads_b != active_threads_a) {
D(3, "** warp #" << wid << " active threads changed from " << active_threads_b << " to " << active_threads_a);
}
printTrace(&inst_in_fetch_, "Fetch");
if (inst_in_fetch_.stall_warp) {
D(3, "** warp #" << wid << " stalled");
stalled_warps_[wid] = true;
}
D(4, inst_in_fetch_);
// advance pipeline
inst_in_fetch_.next(&inst_in_issue_);
}
void Core::decode() {
if ((inst_in_fetch_.fetch_stall_cycles == 0)
&& !inst_in_scheduler_.stalled) {
CPY_TRACE(inst_in_decode_, inst_in_fetch_);
INIT_TRACE(inst_in_fetch_);
}
}
void Core::scheduler() {
if (!inst_in_scheduler_.stalled) {
CPY_TRACE(inst_in_scheduler_, inst_in_decode_);
INIT_TRACE(inst_in_decode_);
}
}
void Core::load_store() {
if ((inst_in_lsu_.mem_stall_cycles > 0) || inst_in_lsu_.stalled) {
// LSU currently busy
if ((inst_in_scheduler_.is_lw || inst_in_scheduler_.is_sw)) {
inst_in_scheduler_.stalled = true;
}
} else {
if (!inst_in_scheduler_.is_lw && !inst_in_scheduler_.is_sw)
return;
// Scheduler has LSU inst
bool scheduler_srcs_busy = false;
if (inst_in_scheduler_.irs1 > 0) {
scheduler_srcs_busy = scheduler_srcs_busy || iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.irs1];
}
if (inst_in_scheduler_.irs2 > 0) {
scheduler_srcs_busy = scheduler_srcs_busy || iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.irs2];
}
if (inst_in_scheduler_.frs1 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs1];
}
if (inst_in_scheduler_.frs2 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs2];
}
if (inst_in_scheduler_.frs3 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs3];
}
if (inst_in_scheduler_.vrs1 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || vRenameTable_[inst_in_scheduler_.vrs1];
}
if (inst_in_scheduler_.vrs2 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || vRenameTable_[inst_in_scheduler_.vrs2];
}
if (scheduler_srcs_busy) {
inst_in_scheduler_.stalled = true;
} else {
if (inst_in_scheduler_.ird > 0)
iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.ird] = true;
if (inst_in_scheduler_.frd >= 0)
fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frd] = true;
if (inst_in_scheduler_.vrd >= 0)
vRenameTable_[inst_in_scheduler_.vrd] = true;
CPY_TRACE(inst_in_lsu_, inst_in_scheduler_);
INIT_TRACE(inst_in_scheduler_);
}
}
if (inst_in_lsu_.mem_stall_cycles > 0)
inst_in_lsu_.mem_stall_cycles--;
}
void Core::execute_unit() {
if (inst_in_scheduler_.is_lw || inst_in_scheduler_.is_sw)
if (!inst_in_decode_.enter(&inst_in_issue_))
return;
bool scheduler_srcs_busy = false;
// advance pipeline
inst_in_decode_.next(&inst_in_issue_);
}
if (inst_in_scheduler_.irs1 > 0) {
scheduler_srcs_busy = scheduler_srcs_busy || iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.irs1];
void Core::issue() {
if (!inst_in_issue_.enter(&inst_in_execute_))
return;
bool in_use_regs = (inst_in_issue_.used_iregs & in_use_iregs_[inst_in_issue_.wid]) != 0
|| (inst_in_issue_.used_fregs & in_use_fregs_[inst_in_issue_.wid]) != 0
|| (inst_in_issue_.used_vregs & in_use_vregs_) != 0;
if (in_use_regs) {
D(3, "Issue: registers not ready!");
inst_in_issue_.stalled = true;
return;
}
switch (inst_in_issue_.rdest_type) {
case 1:
if (inst_in_issue_.rdest)
in_use_iregs_[inst_in_issue_.wid][inst_in_issue_.rdest] = 1;
break;
case 2:
in_use_fregs_[inst_in_issue_.wid][inst_in_issue_.rdest] = 1;
break;
case 3:
in_use_vregs_[inst_in_issue_.rdest] = 1;
break;
default:
break;
}
if (inst_in_scheduler_.irs2 > 0) {
scheduler_srcs_busy = scheduler_srcs_busy || iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.irs2];
}
// advance pipeline
inst_in_issue_.next(&inst_in_execute_);
}
if (inst_in_scheduler_.frs1 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs1];
}
void Core::execute() {
if (!inst_in_execute_.enter(&inst_in_writeback_))
return;
if (inst_in_scheduler_.frs2 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs2];
}
if (inst_in_scheduler_.frs3 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs3];
}
if (inst_in_scheduler_.vrs1 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || vRenameTable_[inst_in_scheduler_.vrs1];
}
if (inst_in_scheduler_.vrs2 >= 0) {
scheduler_srcs_busy = scheduler_srcs_busy || vRenameTable_[inst_in_scheduler_.vrs2];
}
if (scheduler_srcs_busy) {
D(3, "Execute: srcs not ready!");
inst_in_scheduler_.stalled = true;
} else {
if (inst_in_scheduler_.ird > 0) {
iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.ird] = true;
}
if (inst_in_scheduler_.frd >= 0) {
fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frd] = true;
}
if (inst_in_scheduler_.vrd >= 0) {
vRenameTable_[inst_in_scheduler_.vrd] = true;
}
CPY_TRACE(inst_in_exe_, inst_in_scheduler_);
INIT_TRACE(inst_in_scheduler_);
}
// advance pipeline
inst_in_execute_.next(&inst_in_writeback_);
}
void Core::writeback() {
if (inst_in_wb_.ird > 0) {
iRenameTable_[inst_in_wb_.wid][inst_in_wb_.ird] = false;
if (!inst_in_writeback_.enter(NULL))
return;
switch (inst_in_writeback_.rdest_type) {
case 1:
in_use_iregs_[inst_in_writeback_.wid][inst_in_writeback_.rdest] = 0;
break;
case 2:
in_use_fregs_[inst_in_writeback_.wid][inst_in_writeback_.rdest] = 0;
break;
case 3:
in_use_vregs_[inst_in_writeback_.rdest] = 0;
break;
default:
break;
}
if (inst_in_wb_.frd >= 0) {
fRenameTable_[inst_in_wb_.wid][inst_in_wb_.frd] = false;
if (inst_in_writeback_.stall_warp) {
stalled_warps_[inst_in_writeback_.wid] = 0;
}
if (inst_in_wb_.vrd >= 0) {
vRenameTable_[inst_in_wb_.vrd] = false;
}
if (inst_in_wb_.stall_warp) {
stalled_warps_[inst_in_wb_.wid] = false;
}
INIT_TRACE(inst_in_wb_);
bool serviced_exe = false;
if ((inst_in_exe_.ird > 0)
|| (inst_in_exe_.frd >= 0)
|| (inst_in_exe_.vrd >= 0)
|| (inst_in_exe_.stall_warp)) {
CPY_TRACE(inst_in_wb_, inst_in_exe_);
INIT_TRACE(inst_in_exe_);
serviced_exe = true;
}
if (inst_in_lsu_.is_sw) {
INIT_TRACE(inst_in_lsu_);
} else {
if (((inst_in_lsu_.ird > 0)
|| (inst_in_lsu_.frd >= 0)
|| (inst_in_lsu_.vrd >= 0))
&& (inst_in_lsu_.mem_stall_cycles == 0)) {
if (serviced_exe) {
// Stalling LSU because EXE is busy
inst_in_lsu_.stalled = true;
} else {
CPY_TRACE(inst_in_wb_, inst_in_lsu_);
INIT_TRACE(inst_in_lsu_);
}
}
}
// advance pipeline
inst_in_writeback_.next(NULL);
}
Word Core::get_csr(Addr addr, int tid, int wid) {
if (addr == CSR_WTID) {
if (addr == CSR_FFLAGS) {
return fcsrs_.at(wid) & 0x1F;
} else if (addr == CSR_FRM) {
return (fcsrs_.at(wid) >> 5);
} else if (addr == CSR_FCSR) {
return fcsrs_.at(wid);
} else if (addr == CSR_WTID) {
// Warp threadID
return tid;
} else if (addr == CSR_LTID) {
@@ -404,10 +271,10 @@ Word Core::get_csr(Addr addr, int tid, int wid) {
return arch_.num_cores();
} else if (addr == CSR_INSTRET) {
// NumInsts
return num_insts_;
return insts_;
} else if (addr == CSR_INSTRET_H) {
// NumInsts
return (Word)(num_insts_ >> 32);
return (Word)(insts_ >> 32);
} else if (addr == CSR_CYCLE) {
// NumCycles
return (Word)steps_;
@@ -419,8 +286,16 @@ Word Core::get_csr(Addr addr, int tid, int wid) {
}
}
void Core::set_csr(Addr addr, Word value) {
csrs_.at(addr) = value;
void Core::set_csr(Addr addr, Word value, int /*tid*/, int wid) {
if (addr == CSR_FFLAGS) {
fcsrs_.at(wid) = (fcsrs_.at(wid) & ~0x1F) | (value & 0x1F);
} else if (addr == CSR_FRM) {
fcsrs_.at(wid) = (fcsrs_.at(wid) & ~0xE0) | (value << 5);
} else if (addr == CSR_FCSR) {
fcsrs_.at(wid) = value & 0xff;
} else {
csrs_.at(addr) = value;
}
}
void Core::barrier(int bar_id, int count, int warp_id) {
@@ -430,7 +305,7 @@ void Core::barrier(int bar_id, int count, int warp_id) {
return;
for (int i = 0; i < arch_.num_warps(); ++i) {
if (barrier.test(i)) {
warps_.at(i).activate();
warps_.at(i)->activate();
}
}
barrier.reset();
@@ -441,6 +316,7 @@ Word Core::icache_fetch(Addr addr, bool sup) {
}
Word Core::dcache_read(Addr addr, bool sup) {
++loads_;
#ifdef SM_ENABLE
if ((addr >= (SHARED_MEM_BASE_ADDR - SMEM_SIZE))
&& ((addr + 4) <= SHARED_MEM_BASE_ADDR)) {
@@ -451,6 +327,7 @@ Word Core::dcache_read(Addr addr, bool sup) {
}
void Core::dcache_write(Addr addr, Word data, bool sup, Size size) {
++stores_;
#ifdef SM_ENABLE
if ((addr >= (SHARED_MEM_BASE_ADDR - SMEM_SIZE))
&& ((addr + 4) <= SHARED_MEM_BASE_ADDR)) {
@@ -461,36 +338,17 @@ void Core::dcache_write(Addr addr, Word data, bool sup, Size size) {
mem_.write(addr, data, sup, size);
}
void Core::getCacheDelays(trace_inst_t *trace_inst) {
trace_inst->fetch_stall_cycles += 1;
if (trace_inst->is_sw || trace_inst->is_lw) {
trace_inst->mem_stall_cycles += 3;
}
}
bool Core::running() const {
bool stages_have_valid = inst_in_fetch_.valid
|| inst_in_decode_.valid
|| inst_in_scheduler_.valid
|| inst_in_lsu_.valid
|| inst_in_exe_.valid
|| inst_in_wb_.valid;
if (stages_have_valid)
return true;
for (unsigned i = 0; i < warps_.size(); ++i) {
if (warps_[i].active()) {
return true;
}
}
return false;
return inst_in_fetch_.valid
|| inst_in_decode_.valid
|| inst_in_issue_.valid
|| inst_in_execute_.valid
|| inst_in_writeback_.valid;
}
void Core::printStats() const {
std::cout << "Total steps: " << steps_ << std::endl;
for (unsigned i = 0; i < warps_.size(); ++i) {
std::cout << "=== Warp " << i << " ===" << std::endl;
warps_[i].printStats();
}
std::cout << "Steps : " << steps_ << std::endl
<< "Insts : " << insts_ << std::endl
<< "Loads : " << loads_ << std::endl
<< "Stores: " << stores_ << std::endl;
}

View File

@@ -5,6 +5,7 @@
#include <list>
#include <stack>
#include <unordered_map>
#include <memory>
#include <set>
#include "debug.h"
@@ -13,14 +14,15 @@
#include "decode.h"
#include "mem.h"
#include "warp.h"
#include "trace.h"
#include "pipeline.h"
namespace vortex {
class Core {
public:
Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id = 0);
~Core();
Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id);
void clear();
bool running() const;
@@ -33,7 +35,7 @@ public:
}
Warp& warp(int i) {
return warps_[i];
return *warps_.at(i);
}
Decoder& decoder() {
@@ -42,23 +44,23 @@ public:
const ArchDef& arch() const {
return arch_;
}
Word interruptEntry() const {
return interruptEntry_;
}
unsigned long num_insts() const {
return num_insts_;
return insts_;
}
unsigned long num_steps() const {
return steps_;
}
Word getIRegValue(int reg) const {
return warps_[0]->getIRegValue(reg);
}
Word get_csr(Addr addr, int tid, int wid);
void set_csr(Addr addr, Word value);
void set_csr(Addr addr, Word value, int tid, int wid);
void barrier(int bar_id, int count, int warp_id);
@@ -70,20 +72,22 @@ public:
private:
void schedule();
void fetch();
void decode();
void scheduler();
void execute_unit();
void load_store();
void issue();
void execute();
void writeback();
void getCacheDelays(trace_inst_t *);
void warpScheduler();
std::vector<std::vector<bool>> iRenameTable_;
std::vector<std::vector<bool>> fRenameTable_;
std::vector<bool> vRenameTable_;
std::vector<bool> stalled_warps_;
std::vector<RegMask> in_use_iregs_;
std::vector<RegMask> in_use_fregs_;
RegMask in_use_vregs_;
WarpMask stalled_warps_;
std::vector<std::shared_ptr<Warp>> warps_;
std::vector<WarpMask> barriers_;
std::vector<Word> csrs_;
std::vector<Byte> fcsrs_;
Word id_;
const ArchDef &arch_;
@@ -91,22 +95,19 @@ private:
MemoryUnit &mem_;
#ifdef SM_ENABLE
RAM shared_mem_;
#endif
std::vector<Warp> warps_;
std::vector<WarpMask> barriers_;
std::vector<Word> csrs_;
int schedule_w_;
uint64_t steps_;
uint64_t num_insts_;
Word interruptEntry_;
bool foundSchedule_;
#endif
trace_inst_t inst_in_fetch_;
trace_inst_t inst_in_decode_;
trace_inst_t inst_in_scheduler_;
trace_inst_t inst_in_exe_;
trace_inst_t inst_in_lsu_;
trace_inst_t inst_in_wb_;
Pipeline inst_in_schedule_;
Pipeline inst_in_fetch_;
Pipeline inst_in_decode_;
Pipeline inst_in_issue_;
Pipeline inst_in_execute_;
Pipeline inst_in_writeback_;
uint64_t steps_;
uint64_t insts_;
uint64_t loads_;
uint64_t stores_;
};
} // namespace vortex

View File

@@ -11,47 +11,215 @@
#include "decode.h"
#include "archdef.h"
#include "instr.h"
#include "trace.h"
using namespace vortex;
struct InstTableEntry_t {
const char *opString;
bool controlFlow;
InstType iType;
};
static const std::unordered_map<int, struct InstTableEntry_t> sc_instTable = {
{Opcode::NOP, {"nop" , false, InstType::N_TYPE}},
{Opcode::R_INST, {"r_type", false, InstType::R_TYPE}},
{Opcode::L_INST, {"load" , false, InstType::I_TYPE}},
{Opcode::I_INST, {"i_type", false, InstType::I_TYPE}},
{Opcode::S_INST, {"store" , false, InstType::S_TYPE}},
{Opcode::B_INST, {"branch", true , InstType::B_TYPE}},
{Opcode::LUI_INST, {"lui" , false, InstType::U_TYPE}},
{Opcode::AUIPC_INST, {"auipc" , false, InstType::U_TYPE}},
{Opcode::JAL_INST, {"jal" , true , InstType::J_TYPE}},
{Opcode::JALR_INST, {"jalr" , true , InstType::I_TYPE}},
{Opcode::SYS_INST, {"SYS" , true , InstType::I_TYPE}},
{Opcode::FENCE, {"fence" , true , InstType::I_TYPE}},
{Opcode::PJ_INST, {"pred j", true , InstType::R_TYPE}},
{Opcode::GPGPU, {"gpgpu" , false, InstType::R_TYPE}},
{Opcode::VSET_ARITH, {"vsetvl", false, InstType::V_TYPE}},
{Opcode::VL, {"vl" , false, InstType::V_TYPE}},
{Opcode::VS, {"vs" , false, InstType::V_TYPE}},
{Opcode::FL, {"fl" , false, InstType::I_TYPE }},
{Opcode::FS, {"fs" , false, InstType::S_TYPE }},
{Opcode::FCI, {"fci" , false, InstType::R_TYPE }},
{Opcode::FMADD, {"fma" , false, InstType::R4_TYPE }},
{Opcode::FMSUB, {"fms" , false, InstType::R4_TYPE }},
{Opcode::FMNMADD, {"fmnma" , false, InstType::R4_TYPE }},
{Opcode::FMNMSUB, {"fmnms" , false, InstType::R4_TYPE }}
{Opcode::NOP, {false, InstType::N_TYPE}},
{Opcode::R_INST, {false, InstType::R_TYPE}},
{Opcode::L_INST, {false, InstType::I_TYPE}},
{Opcode::I_INST, {false, InstType::I_TYPE}},
{Opcode::S_INST, {false, InstType::S_TYPE}},
{Opcode::B_INST, {true , InstType::B_TYPE}},
{Opcode::LUI_INST, {false, InstType::U_TYPE}},
{Opcode::AUIPC_INST, {false, InstType::U_TYPE}},
{Opcode::JAL_INST, {true , InstType::J_TYPE}},
{Opcode::JALR_INST, {true , InstType::I_TYPE}},
{Opcode::SYS_INST, {true , InstType::I_TYPE}},
{Opcode::FENCE, {true , InstType::I_TYPE}},
{Opcode::FL, {false, InstType::I_TYPE}},
{Opcode::FS, {false, InstType::S_TYPE}},
{Opcode::FCI, {false, InstType::R_TYPE}},
{Opcode::FMADD, {false, InstType::R4_TYPE}},
{Opcode::FMSUB, {false, InstType::R4_TYPE}},
{Opcode::FMNMADD, {false, InstType::R4_TYPE}},
{Opcode::FMNMSUB, {false, InstType::R4_TYPE}},
{Opcode::VSET, {false, InstType::V_TYPE}},
{Opcode::GPGPU, {false, InstType::R_TYPE}},
};
std::ostream &vortex::operator<<(std::ostream &os, Instr &instr) {
os << std::dec << sc_instTable.at(instr.opcode_).opString;
static const char* op_string(const Instr &instr) {
Word func3 = instr.getFunc3();
Word func7 = instr.getFunc7();
Word rs2 = instr.getRSrc(1);
Word imm = instr.getImm();
switch (instr.getOpcode()) {
case Opcode::NOP: return "NOP";
case Opcode::LUI_INST: return "LUI";
case Opcode::AUIPC_INST: return "AUIPC";
case Opcode::R_INST:
if (func7 & 0x1) {
switch (func3) {
case 0: return "MUL";
case 1: return "MULH";
case 2: return "MULHSU";
case 3: return "MULHU";
case 4: return "DIV";
case 5: return "DIVU";
case 6: return "REM";
case 7: return "REMU";
}
} else {
switch (func3) {
case 0: return func7 ? "SUB" : "ADD";
case 1: return "SLL";
case 2: return "SLT";
case 3: return "SLTU";
case 4: return "XOR";
case 5: return func7 ? "SRA" : "SRL";
case 6: return "OR";
case 7: return "AND";
}
}
case Opcode::I_INST:
switch (func3) {
case 0: return func7 ? "SUBI" : "ADDI";
case 1: return "SLLI";
case 2: return "SLTI";
case 3: return "SLTIU";
case 4: return "XORI";
case 5: return func7 ? "SRAI" : "SRLI";
case 6: return "ORI";
case 7: return "ANDI";
}
case Opcode::B_INST:
switch (func3) {
case 0: return "BEQ";
case 1: return "BNE";
case 4: return "BLT";
case 5: return "BGE";
case 6: return "BLTU";
case 7: return "BGEU";
default:
std::abort();
}
case Opcode::JAL_INST: return "JAL";
case Opcode::JALR_INST: return "JALR";
case Opcode::L_INST:
switch (func3) {
case 0: return "LBI";
case 1: return "LHI";
case 2: return "LW";
case 4: return "LBU";
case 5: return "LHU";
default:
std::abort();
}
case Opcode::S_INST:
switch (func3) {
case 0: return "SB";
case 1: return "SH";
case 2: return "SW";
default:
std::abort();
}
case Opcode::SYS_INST:
switch (func3) {
case 0: return imm ? "EBREAK" : "ECALL";
case 1: return "CSRRW";
case 2: return "CSRRS";
case 3: return "CSRRC";
case 5: return "CSRRWI";
case 6: return "CSRRSI";
case 7: return "CSRRCI";
default:
std::abort();
}
case Opcode::FENCE: return "FENCE";
case Opcode::FL: return (func3 == 0x2) ? "FL" : "VL";
case Opcode::FS: return (func3 == 0x2) ? "FS" : "VS";
case Opcode::FCI:
switch (func7) {
case 0x00: return "FADD";
case 0x04: return "FSUB";
case 0x08: return "FMUL";
case 0x0c: return "FDIV";
case 0x2c: return "FSQRT";
case 0x10:
switch (func3) {
case 0: return "FSGNJ";
case 1: return "FSGNJN";
case 2: return "FSGNJX";
default:
std::abort();
}
case 0x14:
switch (func3) {
case 0: return "FMIM";
case 1: return "FMAX";
default:
std::abort();
}
case 0x50:
switch (func3) {
case 0: return "FLE";
case 1: return "FLT";
case 2: return "FEQ";
default:
std::abort();
}
case 0x60: return rs2 ? "FCVT.WU" : "FCVT.W";
case 0x68: return rs2 ? "FCVT.S" : "FCVT.S";
case 0x70: return func3 ? "FLASS" : "FMV.X.W";
case 0x78: return "FMV.W";
default:
std::abort();
}
case Opcode::FMADD: return "FMADD";
case Opcode::FMSUB: return "FMSUB";
case Opcode::FMNMADD: return "FMNMADD";
case Opcode::FMNMSUB: return "FMNMSUB";
case Opcode::VSET: return "VSET";
case Opcode::GPGPU:
switch (func3) {
case 0: return "TMC";
case 1: return "WSPAWN";
case 2: return "SPLIT";
case 3: return "JOIN";
case 4: return "BAR";
default:
std::abort();
}
default:
std::abort();
}
}
namespace vortex {
std::ostream &operator<<(std::ostream &os, const Instr &instr) {
os << op_string(instr) << ": ";
int rdt = instr.getRDType();
int rd = instr.getRDest();
switch (rdt) {
case 1: os << "r" << std::dec << rd << " <- "; break;
case 2: os << "fr" << std::dec << rd << " <- "; break;
case 3: os << "vr" << std::dec << rd << " <- "; break;
default: break;
}
int i = 0;
for (; i < instr.getNRSrc(); ++i) {
int rst = instr.getRSType(i);
int rs = instr.getRSrc(i);
if (i) os << ", ";
switch (rst) {
case 1: os << "r" << std::dec << rs; break;
case 2: os << "fr" << std::dec << rs; break;
case 3: os << "vr" << std::dec << rs; break;
default: break;
}
}
if (instr.hasImm()) {
if (i) os << ", ";
os << "imm=0x" << std::hex << instr.getImm();
}
return os;
}
}
Decoder::Decoder(const ArchDef &arch) {
inst_s_ = arch.wsize() * 8;
@@ -63,22 +231,16 @@ Decoder::Decoder(const ArchDef &arch) {
vmask_s_ = 1;
shift_opcode_ = 0;
shift_rd_ = opcode_s_;
shift_func3_ = opcode_s_ + reg_s_;
shift_rs1_ = opcode_s_ + reg_s_ + func3_s_;
shift_rs2_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_;
shift_func7_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_;
shift_func2_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_;
shift_rs3_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_ + func2_s_;
shift_j_u_immed_ = opcode_s_ + reg_s_;
shift_s_b_immed_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_;
shift_i_immed_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_;
shift_vset_immed_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_;
shift_vmask_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_;
shift_vmop_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_ + vmask_s_;
shift_vnf_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_ + vmask_s_ + mop_s_;
shift_func6_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_ + 1;
shift_vset_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_ + 6;
shift_rd_ = opcode_s_;
shift_func3_ = shift_rd_ + reg_s_;
shift_rs1_ = shift_func3_ + func3_s_;
shift_rs2_ = shift_rs1_ + reg_s_;
shift_func7_ = shift_rs2_ + reg_s_;
shift_rs3_ = shift_func7_ + func2_s_;
shift_vmop_ = shift_func7_ + vmask_s_;
shift_vnf_ = shift_vmop_ + mop_s_;
shift_func6_ = shift_func7_ + 1;
shift_vset_ = shift_func7_ + 6;
reg_mask_ = 0x1f;
func2_mask_ = 0x2;
@@ -94,223 +256,174 @@ Decoder::Decoder(const ArchDef &arch) {
v_imm_mask_ = 0x7ff;
}
std::shared_ptr<Instr> Decoder::decode(
const std::vector<Byte> &v,
Size &idx,
trace_inst_t *trace_inst)
{
Word code(readWord(v, idx, inst_s_ / 8));
// std::cout << "code: " << (int) code << " v: " << v << " indx: " << idx << "\n";
std::shared_ptr<Instr> Decoder::decode(Word code) {
auto instr = std::make_shared<Instr>();
Opcode op = (Opcode)((code >> shift_opcode_) & opcode_mask_);
// std::cout << "opcode: " << op << "\n";
instr->setOpcode(op);
Word imeed, dest_bits, imm_bits, bit_11, bits_4_1, bit_10_5,
bit_12, bits_19_12, bits_10_1, bit_20, unordered, func3;
Word func3 = (code >> shift_func3_) & func3_mask_;
Word func6 = (code >> shift_func6_) & func6_mask_;
Word func7 = (code >> shift_func7_) & func7_mask_;
InstType curInstType = sc_instTable.at(op).iType;
int rd = (code >> shift_rd_) & reg_mask_;
int rs1 = (code >> shift_rs1_) & reg_mask_;
int rs2 = (code >> shift_rs2_) & reg_mask_;
int rs3 = (code >> shift_rs3_) & reg_mask_;
auto iType = sc_instTable.at(op).iType;
if (op == Opcode::FL || op == Opcode::FS) {
// need to find out whether it is vector or floating point inst
Word width_bits = (code >> shift_func3_) & func3_mask_;
if ((width_bits == 0x1) || (width_bits == 0x2)
|| (width_bits == 0x3) || (width_bits == 0x4)) {
curInstType = (op == Opcode::FL) ? InstType::I_TYPE : InstType::S_TYPE;
if (func3 != 0x2) {
iType = InstType::V_TYPE;
}
}
switch (curInstType) {
switch (iType) {
case InstType::N_TYPE:
break;
case InstType::R_TYPE:
if (op == Opcode::FCI) {
instr->setDestFReg((code >> shift_rd_) & reg_mask_);
instr->setSrcFReg((code >> shift_rs1_) & reg_mask_);
instr->setSrcFReg((code >> shift_rs2_) & reg_mask_);
if (op == Opcode::FCI) {
instr->setSrcFReg(rs1);
instr->setSrcFReg(rs2);
switch (func7) {
case 0x50: // FLE, FLT, FEQ
case 0x60: // FCVT.WU, FCVT.W
case 0x70: // FLASS, FMV.X.W
instr->setDestReg(rd);
break;
default:
instr->setDestFReg(rd);
}
} else {
instr->setDestReg((code >> shift_rd_) & reg_mask_);
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
instr->setDestReg(rd);
instr->setSrcReg(rs1);
instr->setSrcReg(rs2);
}
instr->setFunc3((code >> shift_func3_) & func3_mask_);
instr->setFunc7((code >> shift_func7_) & func7_mask_);
break;
case InstType::I_TYPE:
if (op == Opcode::FCI || op == Opcode::FL) {
instr->setDestFReg((code >> shift_rd_) & reg_mask_);
instr->setSrcFReg((code >> shift_rs1_) & reg_mask_);
} else {
instr->setDestReg((code >> shift_rd_) & reg_mask_);
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
}
instr->setFunc7((code >> shift_func7_) & func7_mask_);
func3 = (code >> shift_func3_) & func3_mask_;
instr->setFunc3(func3);
instr->setFunc7(func7);
break;
case InstType::I_TYPE: {
if (op == Opcode::FCI || op == Opcode::FL) {
instr->setDestFReg(rd);
instr->setSrcFReg(rs1);
} else {
instr->setDestReg(rd);
instr->setSrcReg(rs1);
}
instr->setFunc3(func3);
instr->setFunc7(func7);
if ((func3 == 5) && (op != L_INST) && (op != Opcode::FL)) {
instr->setSrcImm(signExt(((code >> shift_rs2_) & reg_mask_), 5, reg_mask_));
instr->setImm(signExt(rs2, 5, reg_mask_));
} else {
instr->setSrcImm(signExt(code >> shift_i_immed_, 12, i_imm_mask_));
instr->setImm(signExt(code >> shift_rs2_, 12, i_imm_mask_));
}
break;
} break;
case InstType::S_TYPE:
case InstType::S_TYPE: {
if (op == Opcode::FS) {
instr->setSrcFReg((code >> shift_rs1_) & reg_mask_);
instr->setSrcFReg((code >> shift_rs2_) & reg_mask_);
instr->setSrcFReg(rs1);
instr->setSrcFReg(rs2);
} else {
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
instr->setSrcReg(rs1);
instr->setSrcReg(rs2);
}
instr->setFunc3((code >> shift_func3_) & func3_mask_);
dest_bits = (code >> shift_rd_) & reg_mask_;
imm_bits = (code >> shift_s_b_immed_ & func7_mask_);
imeed = (imm_bits << reg_s_) | dest_bits;
instr->setSrcImm(signExt(imeed, 12, s_imm_mask_));
break;
instr->setFunc3(func3);
Word imeed = (func7 << reg_s_) | rd;
instr->setImm(signExt(imeed, 12, s_imm_mask_));
} break;
case InstType::B_TYPE:
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
instr->setFunc3((code >> shift_func3_) & func3_mask_);
dest_bits = (code >> shift_rd_) & reg_mask_;
imm_bits = (code >> shift_s_b_immed_ & func7_mask_);
bit_11 = dest_bits & 0x1;
bits_4_1 = dest_bits >> 1;
bit_10_5 = imm_bits & 0x3f;
bit_12 = imm_bits >> 6;
imeed = 0 | (bits_4_1 << 1) | (bit_10_5 << 5) | (bit_11 << 11) | (bit_12 << 12);
instr->setSrcImm(signExt(imeed, 13, b_imm_mask_));
break;
case InstType::B_TYPE: {
instr->setSrcReg(rs1);
instr->setSrcReg(rs2);
instr->setFunc3(func3);
Word bit_11 = rd & 0x1;
Word bits_4_1 = rd >> 1;
Word bit_10_5 = func7 & 0x3f;
Word bit_12 = func7 >> 6;
Word imeed = (bits_4_1 << 1) | (bit_10_5 << 5) | (bit_11 << 11) | (bit_12 << 12);
instr->setImm(signExt(imeed, 13, b_imm_mask_));
} break;
case InstType::U_TYPE:
instr->setDestReg((code >> shift_rd_) & reg_mask_);
instr->setSrcImm(signExt(code >> shift_j_u_immed_, 20, u_imm_mask_));
instr->setDestReg(rd);
instr->setImm(signExt(code >> shift_func3_, 20, u_imm_mask_));
break;
case InstType::J_TYPE:
instr->setDestReg((code >> shift_rd_) & reg_mask_);
unordered = code >> shift_j_u_immed_;
bits_19_12 = unordered & 0xff;
bit_11 = (unordered >> 8) & 0x1;
bits_10_1 = (unordered >> 9) & 0x3ff;
bit_20 = (unordered >> 19) & 0x1;
imeed = 0 | (bits_10_1 << 1) | (bit_11 << 11) | (bits_19_12 << 12) | (bit_20 << 20);
case InstType::J_TYPE: {
instr->setDestReg(rd);
Word unordered = code >> shift_func3_;
Word bits_19_12 = unordered & 0xff;
Word bit_11 = (unordered >> 8) & 0x1;
Word bits_10_1 = (unordered >> 9) & 0x3ff;
Word bit_20 = (unordered >> 19) & 0x1;
Word imeed = 0 | (bits_10_1 << 1) | (bit_11 << 11) | (bits_19_12 << 12) | (bit_20 << 20);
if (bit_20) {
imeed |= ~j_imm_mask_;
}
instr->setSrcImm(imeed);
break;
instr->setImm(imeed);
} break;
case InstType::V_TYPE:
D(3, "Entered here: instr type = vector" << op);
switch (op) {
case Opcode::VSET_ARITH: //TODO: arithmetic ops
instr->setDestVReg((code >> shift_rd_) & reg_mask_);
instr->setSrcVReg((code >> shift_rs1_) & reg_mask_);
func3 = (code >> shift_func3_) & func3_mask_;
case Opcode::VSET: {
instr->setDestVReg(rd);
instr->setSrcVReg(rs1);
instr->setFunc3(func3);
D(3, "Entered here: instr type = vector");
if (func3 == 7) {
D(3, "Entered here: imm instr");
instr->setVsetImm(!(code >> shift_vset_));
if (instr->getVsetImm()) {
instr->setImm(!(code >> shift_vset_));
if (instr->getImm()) {
Word immed = (code >> shift_rs2_) & v_imm_mask_;
D(3, "immed" << immed);
instr->setSrcImm(immed); //TODO
instr->setImm(immed);
instr->setVlmul(immed & 0x3);
D(3, "lmul " << (immed & 0x3));
instr->setVediv((immed >> 4) & 0x3);
D(3, "ediv " << ((immed >> 4) & 0x3));
instr->setVsew((immed >> 2) & 0x3);
D(3, "sew " << ((immed >> 2) & 0x3));
} else {
instr->setSrcVReg((code >> shift_rs2_) & reg_mask_);
instr->setSrcVReg(rs2);
}
} else {
instr->setSrcVReg((code >> shift_rs2_) & reg_mask_);
instr->setVmask((code >> shift_vmask_) & 0x1);
instr->setFunc6((code >> shift_func6_) & func6_mask_);
instr->setSrcVReg(rs2);
instr->setVmask((code >> shift_func7_) & 0x1);
instr->setFunc6(func6);
}
break;
} break;
case Opcode::VL:
D(3, "vector load instr");
instr->setDestVReg((code >> shift_rd_) & reg_mask_);
instr->setSrcVReg((code >> shift_rs1_) & reg_mask_);
instr->setVlsWidth((code >> shift_func3_) & func3_mask_);
instr->setSrcVReg((code >> shift_rs2_) & reg_mask_);
instr->setVmask((code >> shift_vmask_));
instr->setDestVReg(rd);
instr->setSrcVReg(rs1);
instr->setVlsWidth(func3);
instr->setSrcVReg(rs2);
instr->setVmask(code >> shift_func7_);
instr->setVmop((code >> shift_vmop_) & func3_mask_);
instr->setVnf((code >> shift_vnf_) & func3_mask_);
break;
case Opcode::VS:
instr->setVs3((code >> shift_rd_) & reg_mask_);
instr->setSrcVReg((code >> shift_rs1_) & reg_mask_);
instr->setVlsWidth((code >> shift_func3_) & func3_mask_);
instr->setSrcVReg((code >> shift_rs2_) & reg_mask_);
instr->setVmask((code >> shift_vmask_));
instr->setVs3(rd);
instr->setSrcVReg(rs1);
instr->setVlsWidth(func3);
instr->setSrcVReg(rs2);
instr->setVmask(code >> shift_func7_);
instr->setVmop((code >> shift_vmop_) & func3_mask_);
instr->setVnf((code >> shift_vnf_) & func3_mask_);
break;
default:
std::cout << "Inavlid opcode.\n";
std::abort();
}
break;
case R4_TYPE:
// RT: add R4_TYPE decoder
instr->setDestFReg((code >> shift_rd_) & reg_mask_);
instr->setSrcFReg((code >> shift_rs1_) & reg_mask_);
instr->setSrcFReg((code >> shift_rs2_) & reg_mask_);
instr->setSrcFReg((code >> shift_rs3_) & reg_mask_);
instr->setFunc3((code >> shift_func3_) & func3_mask_);
instr->setDestFReg(rd);
instr->setSrcFReg(rs1);
instr->setSrcFReg(rs2);
instr->setSrcFReg(rs3);
instr->setFunc3(func3);
break;
default:
std::cout << "Unrecognized argument class in word decoder.\n";
std::abort();
}
if (curInstType != InstType::N_TYPE) {
trace_inst->valid = true;
if (instr->hasRDest()) {
if (instr->is_FpDest()) {
trace_inst->frd = instr->getRDest();
} else if (instr->is_VDest()) {
trace_inst->vrd = instr->getRDest();
} else {
trace_inst->ird = instr->getRDest();
}
}
for (int i = 0; i < instr->getNRSrc(); ++i) {
if (instr->is_FpSrc(i)) {
if (i == 0) trace_inst->frs1 = instr->getRSrc(i);
else if (i == 1) trace_inst->frs2 = instr->getRSrc(i);
else if (i == 2) trace_inst->frs3 = instr->getRSrc(i);
else std::abort();
} else if (instr->is_VSrc(i)) {
if (i == 0) trace_inst->vrs1 = instr->getRSrc(i);
else if (i == 1) trace_inst->vrs2 = instr->getRSrc(i);
else std::abort();
} else {
if (i == 0) trace_inst->irs1 = instr->getRSrc(i);
else if (i == 1) trace_inst->irs2 = instr->getRSrc(i);
else std::abort();
}
}
}
D(2, "Decoded instr 0x" << std::hex << code << " into: " << *instr << std::flush);
D(2, "Instr 0x" << std::hex << code << ": " << *instr << std::flush);
return instr;
}

View File

@@ -2,19 +2,18 @@
#include <vector>
#include <memory>
#include "util.h"
namespace vortex {
class ArchDef;
class Instr;
class trace_inst_t;
class Pipeline;
class Decoder {
public:
Decoder(const ArchDef &);
virtual std::shared_ptr<Instr> decode(const std::vector<Byte> &v, Size &n, trace_inst_t * trace_inst);
std::shared_ptr<Instr> decode(Word code);
private:

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,6 @@
#pragma once
#include "types.h"
#include "trace.h"
namespace vortex {
@@ -20,12 +19,7 @@ enum Opcode {
JALR_INST = 0x67,
SYS_INST = 0x73,
FENCE = 0x0f,
PJ_INST = 0x7b,
GPGPU = 0x6b,
VSET_ARITH= 0x57,
VL = 0x7,
VS = 0x27,
// F-Extension
// F Extension
FL = 0x7,
FS = 0x27,
FCI = 0x53,
@@ -33,6 +27,12 @@ enum Opcode {
FMSUB = 0x47,
FMNMSUB = 0x4b,
FMNMADD = 0x4f,
// Vector Extension
VSET = 0x57,
VL = 0x7,
VS = 0x27,
// GPGPU Extension
GPGPU = 0x6b,
};
enum InstType {
@@ -51,34 +51,27 @@ class Instr {
public:
Instr()
: opcode_(Opcode::NOP)
, nRsrc_(0)
, hasImmSrc_(false)
, hasRDest_(false)
, is_iDest_(false)
, is_FpDest_(false)
, is_VDest_(false)
, is_FpSrc_(0)
, is_VSrc_(0)
, func2_(0)
, num_rsrcs_(0)
, has_imm_(false)
, rdest_(0)
, func3_(0)
, func7_(0)
{}
friend std::ostream &operator<<(std::ostream &, Instr &);
, func7_(0) {
for (int i = 0; i < MAX_REG_SOURCES; ++i) {
rsrc_type_[i] = 0;
}
}
/* Setters used to "craft" the instruction. */
void setOpcode(Opcode opcode) { opcode_ = opcode; }
void setDestReg(int destReg) { hasRDest_ = true; is_iDest_ = true; rdest_ = destReg; }
void setSrcReg(int srcReg) { rsrc_[nRsrc_++] = srcReg; }
void setDestFReg(int destReg) { hasRDest_ = true; is_FpDest_ = true; rdest_ = destReg; }
void setSrcFReg(int srcReg) { is_FpSrc_ |= (1 << nRsrc_); rsrc_[nRsrc_++] = srcReg; }
void setDestVReg(int destReg) { hasRDest_ = true; is_VDest_ = true; rdest_ = destReg; }
void setSrcVReg(int srcReg) { is_VSrc_ |= (1 << nRsrc_); rsrc_[nRsrc_++] = srcReg; }
void setDestReg(int destReg) { rdest_type_ = 1; rdest_ = destReg; }
void setSrcReg(int srcReg) { rsrc_type_[num_rsrcs_] = 1; rsrc_[num_rsrcs_++] = srcReg; }
void setDestFReg(int destReg) { rdest_type_ = 2; rdest_ = destReg; }
void setSrcFReg(int srcReg) { rsrc_type_[num_rsrcs_] = 2; rsrc_[num_rsrcs_++] = srcReg; }
void setDestVReg(int destReg) { rdest_type_ = 3; rdest_ = destReg; }
void setSrcVReg(int srcReg) { rsrc_type_[num_rsrcs_] = 3; rsrc_[num_rsrcs_++] = srcReg; }
void setFunc3(Word func3) { func3_ = func3; }
void setFunc7(Word func7) { func7_ = func7; }
void setSrcImm(Word srcImm) { hasImmSrc_ = true; immsrc_ = srcImm; }
void setVsetImm(Word vset_imm) { if (vset_imm) vsetImm_ = true; else vsetImm_ = false; }
void setImm(Word imm) { has_imm_ = true; imm_ = imm; }
void setVlsWidth(Word width) { vlsWidth_ = width; }
void setVmop(Word mop) { vMop_ = mop; }
void setVnf(Word nf) { vNf_ = nf; }
@@ -94,29 +87,22 @@ public:
Word getFunc3() const { return func3_; }
Word getFunc6() const { return func6_; }
Word getFunc7() const { return func7_; }
int getNRSrc() const { return nRsrc_; }
int getNRSrc() const { return num_rsrcs_; }
int getRSrc(int i) const { return rsrc_[i]; }
bool hasRDest() const { return hasRDest_; }
int getRDest() const { return rdest_; }
bool hasImm() const { return hasImmSrc_; }
Word getImm() const { return immsrc_; }
bool getVsetImm() const { return vsetImm_; }
int getRSType(int i) const { return rsrc_type_[i]; }
int getRDest() const { return rdest_; }
int getRDType() const { return rdest_type_; }
bool hasImm() const { return has_imm_; }
Word getImm() const { return imm_; }
Word getVlsWidth() const { return vlsWidth_; }
Word getVmop() const { return vMop_; }
Word getvNf() const { return vNf_; }
bool getVmask() const { return vmask_; }
Word getVmask() const { return vmask_; }
Word getVs3() const { return vs3_; }
Word getVlmul() const { return vlmul_; }
Word getVsew() const { return vsew_; }
Word getVediv() const { return vediv_; }
bool is_iDest() const { return is_iDest_; }
bool is_FpDest() const { return is_FpDest_; }
bool is_FpSrc(int i) const { return (is_FpSrc_ >> i) & 0x1; }
bool is_VDest() const { return is_VDest_; }
bool is_VSrc(int i) const { return (is_VSrc_ >> i) & 0x1; }
private:
enum {
@@ -124,24 +110,21 @@ private:
};
Opcode opcode_;
int nRsrc_;
bool hasImmSrc_;
bool hasRDest_;
bool is_iDest_;
bool is_FpDest_;
bool is_VDest_;
int is_FpSrc_;
int is_VSrc_;
Word immsrc_;
Word func2_;
int num_rsrcs_;
bool has_imm_;
int rdest_type_;
int isrc_mask_;
int fsrc_mask_;
int vsrc_mask_;
Word imm_;
int rsrc_type_[MAX_REG_SOURCES];
int rsrc_[MAX_REG_SOURCES];
int rdest_;
Word func3_;
Word func7_;
int rsrc_[MAX_REG_SOURCES];
int rdest_;
//Vector
bool vsetImm_;
bool vmask_;
Word vmask_;
Word vlsWidth_;
Word vMop_;
Word vNf_;
@@ -150,8 +133,8 @@ private:
Word vsew_;
Word vediv_;
Word func6_;
friend std::ostream &operator<<(std::ostream &, const Instr&);
};
std::ostream &operator<<(std::ostream &, Instr &);
}

View File

@@ -22,6 +22,7 @@ int main(int argc, char **argv) {
std::string imgFileName;
bool showHelp(false);
bool showStats(false);
bool riscv_test(false);
/* Read the command line arguments. */
CommandLineArgFlag fh("-h", "--help", "", showHelp);
@@ -30,6 +31,7 @@ int main(int argc, char **argv) {
CommandLineArgSetter<int> fc("-c", "--cores", "", num_cores);
CommandLineArgSetter<int> fw("-w", "--warps", "", num_warps);
CommandLineArgSetter<int> ft("-t", "--threads", "", num_threads);
CommandLineArgFlag fr("-r", "--riscv", "", riscv_test);
CommandLineArgFlag fs("-s", "--stats", "", showStats);
CommandLineArg::readArgs(argc - 1, argv + 1);
@@ -41,6 +43,7 @@ int main(int argc, char **argv) {
" -w, --warps <num> Number of warps\n"
" -t, --threads <num> Number of threads\n"
" -a, --arch <arch string> Architecture string\n"
" -r, --riscv riscv test\n"
" -s, --stats Print stats on exit.\n";
return 0;
}
@@ -64,16 +67,24 @@ int main(int argc, char **argv) {
}
bool running;
do {
running = false;
for (int i = 0; i < num_cores; ++i) {
if (!cores[i]->running())
continue;
running = true;
cores[i]->step();
for (auto& core : cores) {
core->step();
if (core->running())
running = true;
}
} while (running);
if (riscv_test) {
bool status = (1 == cores[0]->getIRegValue(3));
if (status) {
std::cout << "Passed." << std::endl;
} else {
std::cout << "Failed." << std::endl;
return -1;
}
}
return 0;
}

View File

@@ -90,11 +90,14 @@ void MemoryUnit::ADecoder::write(Addr a, Word w, bool /*sup*/, Size wordSize) {
throw BadAddress();
}
RAM *ram = (RAM *)ma.md;
if (wordSize == 8) {
switch (wordSize) {
case 1:
ram->writeByte(ma.addr, &w);
} else if (wordSize == 16) {
break;
case 2:
ram->writeHalf(ma.addr, &w);
} else {
break;
default:
ram->writeWord(ma.addr, &w);
}
}
@@ -247,7 +250,6 @@ void DiskControllerMemDevice::write(Addr a, Word w) {
RAM::RAM(uint32_t num_pages, uint32_t page_size)
: page_bits_(log2ceil(page_size)) {
assert(page_size >= 4);
assert(ispow2(page_size));
mem_.resize(num_pages, NULL);
uint64_t sizel = uint64_t(mem_.size()) << page_bits_;
@@ -272,15 +274,16 @@ Size RAM::size() const {
}
uint8_t *RAM::get(uint32_t address) {
uint32_t page_size = 14 << page_bits_;
uint32_t page_size = 1 << page_bits_;
uint32_t page_index = address >> page_bits_;
uint32_t byte_offset = address & ((1 << page_bits_) - 1);
uint8_t* &page = mem_.at(page_index);
if (page == NULL) {
uint8_t *ptr = new uint8_t[page_size];
for (uint32_t i = 0; i < (page_size / 4); ++i) {
((uint32_t*)ptr)[i] = 0xddccbbaa;
// set uninitialized data to "baadf00d"
for (uint32_t i = 0; i < page_size; ++i) {
ptr[i] = (0xbaadf00d >> ((i & 0x3) * 8)) & 0xff;
}
page = ptr;
}

63
simX/pipeline.cpp Normal file
View File

@@ -0,0 +1,63 @@
#include <iostream>
#include "pipeline.h"
using namespace vortex;
namespace vortex {
std::ostream &operator<<(std::ostream &os, const Pipeline& pipeline) {
os << pipeline.name_ << ": valid=" << pipeline.valid << std::endl;
os << pipeline.name_ << ": stalled=" << pipeline.stalled << std::endl;
os << pipeline.name_ << ": stall_warp=" << pipeline.stall_warp << std::endl;
os << pipeline.name_ << ": wid=" << pipeline.wid << std::endl;
os << pipeline.name_ << ": PC=" << std::hex << pipeline.PC << std::endl;
os << pipeline.name_ << ": used_iregs=" << pipeline.used_iregs << std::endl;
os << pipeline.name_ << ": used_fregs=" << pipeline.used_fregs << std::endl;
os << pipeline.name_ << ": used_vregs=" << pipeline.used_vregs << std::endl;
return os;
}
}
Pipeline::Pipeline(const char* name)
: name_(name) {
this->clear();
}
void Pipeline::clear() {
valid = false;
stalled = false;
stall_warp = false;
wid = 0;
PC = 0;
used_iregs.reset();
used_fregs.reset();
used_vregs.reset();
}
bool Pipeline::enter(Pipeline *drain) {
if (drain) {
if (drain->stalled) {
this->stalled = true;
return false;
}
drain->valid = false;
}
this->stalled = false;
if (!this->valid)
return false;
return true;
}
void Pipeline::next(Pipeline *drain) {
if (drain) {
drain->valid = this->valid;
drain->stalled = this->stalled;
drain->stall_warp = this->stall_warp;
drain->wid = this->wid;
drain->PC = this->PC;
drain->rdest = this->rdest;
drain->rdest_type = this->rdest_type;
drain->used_iregs = this->used_iregs;
drain->used_fregs = this->used_fregs;
drain->used_vregs = this->used_vregs;
}
}

47
simX/pipeline.h Normal file
View File

@@ -0,0 +1,47 @@
#pragma once
#include <memory>
#include "debug.h"
#include "util.h"
namespace vortex {
class Instr;
class Pipeline {
public:
Pipeline(const char* name);
void clear();
bool enter(Pipeline* drain);
void next(Pipeline* drain);
//--
bool valid;
//--
bool stalled;
bool stall_warp;
//--
int wid;
Word PC;
//--
int rdest_type;
int rdest;
RegMask used_iregs;
RegMask used_fregs;
RegMask used_vregs;
private:
const char* name_;
friend std::ostream &operator<<(std::ostream &, const Pipeline&);
};
}

View File

@@ -1,16 +1,14 @@
#!/bin/bash
set -e
make
make -C ../runtime/tests/dev
make -C ../runtime/tests/hello
make -C ../runtime/tests/nlTest
make -C ../runtime/tests/simple
echo start > results.txt
printf "Fasten your seatbelts ladies and gentelmen!!\n\n\n\n"
#./simX -a rv32i -i ../runtime/tests/dev/vx_dev_main.hex -s 1> emulator.debug
#./simX -a rv32i -i ../runtime/tests/hello/hello.hex -s 1> emulator.debug
./simX -a rv32i -i ../runtime/tests/nlTest/vx_nl_main.hex -s 1> emulator.debug
./simX -a rv32i -i ../runtime/tests/simple/vx_simple_main.hex -s 1> emulator.debug
./simX -a rv32i -i ../runtime/tests/dev/vx_dev_main.hex
./simX -a rv32i -i ../runtime/tests/hello/hello.hex
./simX -a rv32i -i ../runtime/tests/nlTest/vx_nl_main.hex
./simX -a rv32i -i ../runtime/tests/simple/vx_simple_main.hex

38
simX/test_rv32f.sh Executable file
View File

@@ -0,0 +1,38 @@
#!/bin/bash
set -e
make
echo ../benchmarks/riscv_tests/isa/rv32uf-p-fadd.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32uf-p-fadd.hex
echo ../benchmarks/riscv_tests/isa/rv32uf-p-fmadd.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32uf-p-fmadd.hex
echo ../benchmarks/riscv_tests/isa/rv32uf-p-fmin.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32uf-p-fmin.hex
echo ../benchmarks/riscv_tests/isa/rv32uf-p-fcmp.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32uf-p-fcmp.hex
echo ../benchmarks/riscv_tests/isa/rv32uf-p-fdst.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32uf-p-ldst.hex
echo ../benchmarks/riscv_tests/isa/rv32uf-p-fcvt.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32uf-p-fcvt.hex
echo ../benchmarks/riscv_tests/isa/rv32uf-p-fcvt_w.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32uf-p-fcvt_w.hex
echo ../benchmarks/riscv_tests/isa/rv32uf-p-move.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32uf-p-move.hex
echo ../benchmarks/riscv_tests/isa/rv32uf-p-recording.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32uf-p-recoding.hex
echo ../benchmarks/riscv_tests/isa/rv32uf-p-fdiv.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32uf-p-fdiv.hex
echo ../benchmarks/riscv_tests/isa/rv32uf-p-fclass.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32uf-p-fclass.hex

143
simX/test_rv32i.sh Executable file
View File

@@ -0,0 +1,143 @@
#!/bin/bash
set -e
make
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-add.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-add.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-addi.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-addi.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-and.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-and.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-andi.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-andi.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-auipc.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-auipc.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-beq.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-beq.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-bge.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-bge.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-bgeu.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-bgeu.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-blt.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-blt.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-bltu.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-bltu.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-bne.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-bne.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-jal.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-jal.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-jalr.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-jalr.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-lb.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-lb.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-lbu.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-lbu.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-lh.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-lh.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-lhu.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-lhu.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-lui.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-lui.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-lw.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-lw.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-or.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-or.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-ori.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-ori.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-sb.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-sb.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-sh.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-sh.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-simple.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-simple.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-sll.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-sll.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-slli.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-slli.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-slt.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-slt.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-slti.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-slti.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-sltiu.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-sltiu.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-sltu.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-sltu.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-sra.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-sra.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-srai.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-srai.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-srl.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-srl.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-srli.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-srli.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-sub.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-sub.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-sw.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-sw.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-xor.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-xor.hex
echo ./../benchmarks/riscv_tests/isa/rv32ui-p-xori.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-xori.hex
echo ./../benchmarks/riscv_tests/isa/rv32um-p-div.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32um-p-div.hex
echo ./../benchmarks/riscv_tests/isa/rv32um-p-divu.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32um-p-divu.hex
echo ./../benchmarks/riscv_tests/isa/rv32um-p-mul.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32um-p-mul.hex
echo ./../benchmarks/riscv_tests/isa/rv32um-p-mulh.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32um-p-mulh.hex
echo ./../benchmarks/riscv_tests/isa/rv32um-p-mulhsu.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32um-p-mulhsu.hex
echo ./../benchmarks/riscv_tests/isa/rv32um-p-mulhu.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32um-p-mulhu.hex
echo ./../benchmarks/riscv_tests/isa/rv32um-p-rem.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32um-p-rem.hex
echo ./../benchmarks/riscv_tests/isa/rv32um-p-remu.hex
./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32um-p-remu.hex

View File

@@ -8,10 +8,13 @@ namespace vortex {
typedef uint8_t Byte;
typedef uint32_t Word;
typedef int32_t WordI;
typedef uint32_t Addr;
typedef uint32_t Size;
typedef std::bitset<32> RegMask;
typedef std::bitset<32> ThreadMask;
typedef std::bitset<32> WarpMask;

View File

@@ -1,6 +1,9 @@
#include <vector>
#include <iostream>
#include <stdexcept>
#include <math.h>
#include <climits>
#include <fcntl.h>
#include "types.h"
#include "util.h"
@@ -76,3 +79,102 @@ void vortex::writeWord(std::vector<Byte> &p, Size &n, Size wordSize, Word w) {
w >>= 8;
}
}
// Convert 32-bit integer register file to IEEE-754 floating point number.
float vortex::intregToFloat(uint32_t input) {
// 31th bit
bool sign = input & 0x80000000;
// Exponent: 23th ~ 30th bits -> 8 bits in total
int32_t exp = ((input & 0x7F800000)>>23);
// printf("exp = %u\n", exp);
// 0th ~ 22th bits -> 23 bits fraction
uint32_t frac = input & 0x007FFFFF;
// Frac_value= 1 + sum{i = 1}{23}{b_{23-i}*2^{-i}}
double frac_value;
if (exp == 0) { // subnormal
if (frac == 0) {
// zero
if (sign)
return -0.0;
else
return 0.0;
}
frac_value = 0.0;
} else
frac_value = 1.0;
for (int i = 0; i < 23; i++) {
int bi = frac & 0x1;
frac_value += static_cast<double>(bi * pow(2.0, i-23));
frac = (frac >> 1);
}
return (float)((static_cast<double>(pow(-1.0, sign))) * (static_cast<double>(pow(2.0, exp - 127.0)))* frac_value);
}
// Convert a floating point number to IEEE-754 32-bit representation,
// so that it could be stored in a 32-bit integer register file
// Reference: https://www.wikihow.com/Convert-a-Number-from-Decimal-to-IEEE-754-Floating-Point-Representation
// https://www.technical-recipes.com/2012/converting-between-binary-and-decimal-representations-of-ieee-754-floating-point-numbers-in-c/
uint32_t vortex::floatToBin(float in_value) {
union {
float input; // assumes sizeof(float) == sizeof(int)
int output;
} data;
data.input = in_value;
std::bitset<sizeof(float) * CHAR_BIT> bits(data.output);
std::string mystring = bits.to_string<char, std::char_traits<char>, std::allocator<char> >();
// Convert binary to uint32_t
Word result = stoul(mystring, nullptr, 2);
return result;
}
// https://en.wikipedia.org/wiki/Single-precision_floating-point_format
// check floating-point number in binary format is NaN
uint8_t vortex::fpBinIsNan(uint32_t din) {
bool fsign = din & 0x80000000;
uint32_t expo = (din>>23) & 0x000000FF;
uint32_t fraction = din & 0x007FFFFF;
uint32_t bit_22 = din & 0x00400000;
if ((expo==0xFF) && (fraction!=0)) {
// if (!fsign && (fraction == 0x00400000))
if (!fsign && (bit_22))
return 1; // quiet NaN, return 1
else
return 2; // signaling NaN, return 2
}
return 0;
}
// check floating-point number in binary format is zero
uint8_t vortex::fpBinIsZero(uint32_t din) {
bool fsign = din & 0x80000000;
uint32_t expo = (din>>23) & 0x000000FF;
uint32_t fraction = din & 0x007FFFFF;
if ((expo==0) && (fraction==0)) {
if (fsign)
return 1; // negative 0
else
return 2; // positive 0
}
return 0; // not zero
}
// check floating-point number in binary format is infinity
uint8_t vortex::fpBinIsInf(uint32_t din) {
bool fsign = din & 0x80000000;
uint32_t expo = (din>>23) & 0x000000FF;
uint32_t fraction = din & 0x007FFFFF;
if ((expo==0xFF) && (fraction==0)) {
if (fsign)
return 1; // negative infinity
else
return 2; // positive infinity
}
return 0; // not infinity
}

View File

@@ -30,4 +30,19 @@ Word readWord(const std::vector<Byte> &b, Size &n, Size wordSize);
void writeByte(std::vector<Byte> &p, Size &n, Byte b);
void writeWord(std::vector<Byte> &p, Size &n, Size wordSize, Word w);
// Convert 32-bit integer register file to IEEE-754 floating point number.
float intregToFloat(uint32_t input);
// Convert a floating point number to IEEE-754 32-bit representation
uint32_t floatToBin(float in_value);
// check floating-point number in binary format is NaN
uint8_t fpBinIsNan(uint32_t din);
// check floating-point number in binary format is zero
uint8_t fpBinIsZero(uint32_t din);
// check floating-point number in binary format is infinity
uint8_t fpBinIsInf(uint32_t din);
}

View File

@@ -12,53 +12,72 @@ using namespace vortex;
Warp::Warp(Core *core, Word id)
: id_(id)
, active_(false)
, core_(core)
, PC_(0x80000000)
, steps_(0)
, insts_(0)
, loads_(0)
, stores_(0) {
tmask_.reset();
, core_(core) {
iRegFile_.resize(core_->arch().num_threads(), std::vector<Word>(core_->arch().num_regs(), 0));
fRegFile_.resize(core_->arch().num_threads(), std::vector<Word>(core_->arch().num_regs(), 0));
vRegFile_.resize(core_->arch().num_regs(), std::vector<Byte>(core_->arch().vsize(), 0));
vRegFile_.resize(core_->arch().num_regs(), std::vector<Byte>(core_->arch().vsize(), 0));
this->clear();
}
void Warp::step(trace_inst_t *trace_inst) {
void Warp::clear() {
PC_ = STARTUP_ADDR;
tmask_.reset();
active_ = false;
}
void Warp::step(Pipeline *pipeline) {
assert(tmask_.any());
Size fetchPos(0);
Size decPos;
Size wordSize(core_->arch().wsize());
std::vector<Byte> fetchBuffer(wordSize);
D(3, "Step: wid=" << id_ << ", PC=0x" << std::hex << PC_);
++steps_;
/* Fetch and decode. */
D(3, "current PC=0x" << std::hex << PC_);
Word fetched = core_->icache_fetch(PC_, 0);
auto instr = core_->decoder().decode(fetched);
// std::cout << "PC: " << std::hex << PC << "\n";
trace_inst->PC = PC_;
// Update pipeline
pipeline->valid = true;
pipeline->PC = PC_;
pipeline->rdest = instr->getRDest();
pipeline->rdest_type = instr->getRDType();
pipeline->used_iregs.reset();
pipeline->used_fregs.reset();
pipeline->used_vregs.reset();
/* Fetch and decode. */
if (wordSize < sizeof(PC_))
PC_ &= ((1ll << (wordSize * 8)) - 1);
unsigned fetchSize = 4;
fetchBuffer.resize(fetchSize);
Word fetched = core_->icache_fetch(PC_ + fetchPos, 0);
writeWord(fetchBuffer, fetchPos, fetchSize, fetched);
decPos = 0;
std::shared_ptr<Instr> instr = core_->decoder().decode(fetchBuffer, decPos, trace_inst);
// Update PC
PC_ += decPos;
switch (pipeline->rdest_type) {
case 1:
pipeline->used_iregs[pipeline->rdest] = 1;
break;
case 2:
pipeline->used_fregs[pipeline->rdest] = 1;
break;
case 3:
pipeline->used_vregs[pipeline->rdest] = 1;
break;
default:
break;
}
for (int i = 0; i < instr->getNRSrc(); ++i) {
int type = instr->getRSType(i);
int reg = instr->getRSrc(i);
switch (type) {
case 1:
pipeline->used_iregs[reg] = 1;
break;
case 2:
pipeline->used_fregs[reg] = 1;
break;
case 3:
pipeline->used_vregs[reg] = 1;
break;
default:
break;
}
}
// Execute
this->execute(*instr, trace_inst);
this->execute(*instr, pipeline);
// At Debug Level 3, print debug info after each instruction.
D(4, "Register state:");
@@ -74,11 +93,4 @@ void Warp::step(trace_inst_t *trace_inst) {
for (int i = 0; i < core_->arch().num_threads(); ++i)
DPN(3, " " << tmask_[i]);
DPN(3, "\n");
}
void Warp::printStats() const {
std::cout << "Steps : " << steps_ << std::endl
<< "Insts : " << insts_ << std::endl
<< "Loads : " << loads_ << std::endl
<< "Stores: " << stores_ << std::endl;
}

View File

@@ -7,6 +7,9 @@
namespace vortex {
class Core;
class Instr;
class Pipeline;
struct DomStackEntry {
DomStackEntry(const ThreadMask &tmask, Word PC)
: tmask(tmask)
@@ -34,14 +37,11 @@ struct vtype {
int vsew;
int vlmul;
};
class Core;
class Instr;
class trace_inst_t;
class Warp {
public:
Warp(Core *core, Word id = 0);
Warp(Core *core, Word id);
void clear();
bool active() const {
return active_;
@@ -57,12 +57,6 @@ public:
return 0;
}
void printStats() const;
Core *core() {
return core_;
}
Word id() const {
return id_;
}
@@ -80,11 +74,15 @@ public:
active_ = tmask_.any();
}
void step(trace_inst_t *);
Word getIRegValue(int reg) const {
return iRegFile_[0][reg];
}
void step(Pipeline *);
private:
void execute(Instr &instr, trace_inst_t *);
void execute(const Instr &instr, Pipeline *);
Word id_;
bool active_;
@@ -100,11 +98,6 @@ private:
struct vtype vtype_;
int vl_;
unsigned long steps_;
unsigned long insts_;
unsigned long loads_;
unsigned long stores_;
};
}