From 8eac091fb5f0ea81863401c67c6035951a9fad62 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 8 Mar 2021 03:44:08 -0800 Subject: [PATCH] simX floating-point fixes and refactoring --- driver/simx/Makefile | 8 +- driver/simx/vortex.cpp | 19 +- simX/Makefile | 2 +- simX/core.cpp | 510 +++---- simX/core.h | 73 +- simX/decode.cpp | 505 ++++--- simX/decode.h | 5 +- simX/execute.cpp | 2958 ++++++++++++++++++---------------------- simX/instr.h | 101 +- simX/main.cpp | 23 +- simX/mem.cpp | 17 +- simX/pipeline.cpp | 63 + simX/pipeline.h | 47 + simX/test_runtime.sh | 14 +- simX/test_rv32f.sh | 38 + simX/test_rv32i.sh | 143 ++ simX/types.h | 3 + simX/util.cpp | 102 ++ simX/util.h | 15 + simX/warp.cpp | 96 +- simX/warp.h | 31 +- 21 files changed, 2425 insertions(+), 2348 deletions(-) create mode 100644 simX/pipeline.cpp create mode 100644 simX/pipeline.h create mode 100755 simX/test_rv32f.sh create mode 100755 simX/test_rv32i.sh diff --git a/driver/simx/Makefile b/driver/simx/Makefile index 1aa7017e..dfd8f1a6 100644 --- a/driver/simx/Makefile +++ b/driver/simx/Makefile @@ -12,8 +12,8 @@ CXXFLAGS += -DDUMP_PERF_STATS #CONFIGS ?= -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 #CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 -CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 -#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1 +#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 +CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1 CXXFLAGS += $(CONFIGS) @@ -21,11 +21,11 @@ LDFLAGS += -shared -pthread #LDFLAGS += -dynamiclib -pthread SRCS = vortex.cpp ../common/vx_utils.cpp -SRCS += $(SIMX_DIR)/util.cpp $(SIMX_DIR)/args.cpp $(SIMX_DIR)/mem.cpp $(SIMX_DIR)/warp.cpp $(SIMX_DIR)/core.cpp $(SIMX_DIR)/decode.cpp $(SIMX_DIR)/execute.cpp +SRCS += $(SIMX_DIR)/util.cpp $(SIMX_DIR)/args.cpp $(SIMX_DIR)/mem.cpp $(SIMX_DIR)/pipeline.cpp $(SIMX_DIR)/warp.cpp $(SIMX_DIR)/core.cpp $(SIMX_DIR)/decode.cpp $(SIMX_DIR)/execute.cpp # Debugigng ifdef DEBUG - CXXFLAGS += -DVCD_OUTPUT $(DBG_FLAGS) + CXXFLAGS += $(DBG_FLAGS) -DUSE_DEBUG=3 else CXXFLAGS += -DNDEBUG endif diff --git a/driver/simx/vortex.cpp b/driver/simx/vortex.cpp index 6234fdff..95b57370 100644 --- a/driver/simx/vortex.cpp +++ b/driver/simx/vortex.cpp @@ -70,6 +70,7 @@ public: , is_running_(false) , thread_(__thread_proc__, this) , ram_((1<<12), (1<<20)) { + mem_allocation_ = ALLOC_BASE_ADDR; mmu_.attach(ram_, 0, 0xffffffff); for (int i = 0; i < arch_.num_cores(); ++i) { @@ -100,12 +101,13 @@ public: if (dest_addr + asize > ram_.size()) return -1; + ram_.write(dest_addr, asize, (uint8_t*)src + src_offset); + /*printf("VXDRV: upload %d bytes to 0x%x\n", size, dest_addr); for (int i = 0; i < size; i += 4) { printf("mem-write: 0x%x <- 0x%x\n", dest_addr + i, *(uint32_t*)((uint8_t*)src + src_offset + i)); }*/ - ram_.write(dest_addr, asize, (uint8_t*)src + src_offset); return 0; } @@ -127,7 +129,10 @@ public: int start() { mutex_.lock(); - is_running_ = true; + for (int i = 0; i < arch_.num_cores(); ++i) { + cores_[i]->clear(); + } + is_running_ = true; mutex_.unlock(); return 0; @@ -162,14 +167,12 @@ private: void run() { bool running; - int num_cores = cores_.at(0)->arch().num_cores(); do { running = false; - for (int i = 0; i < num_cores; ++i) { - if (!cores_[i]->running()) - continue; - running = true; - cores_[i]->step(); + for (auto& core : cores_) { + core->step(); + if (core->running()) + running = true; } } while (running); } diff --git a/simX/Makefile b/simX/Makefile index 37d7ad04..5477dbdf 100644 --- a/simX/Makefile +++ b/simX/Makefile @@ -13,7 +13,7 @@ RTL_DIR = ../hw/rtl PROJECT = simX -SRCS = util.cpp args.cpp mem.cpp warp.cpp core.cpp decode.cpp execute.cpp main.cpp +SRCS = util.cpp args.cpp mem.cpp pipeline.cpp warp.cpp core.cpp decode.cpp execute.cpp main.cpp # Debugigng ifdef DEBUG diff --git a/simX/core.cpp b/simX/core.cpp index d6e8c8ee..ebeb2b13 100644 --- a/simX/core.cpp +++ b/simX/core.cpp @@ -10,119 +10,77 @@ #include "core.h" #include "debug.h" -#define INIT_TRACE(trace_inst) \ - trace_inst.valid = false; \ - trace_inst.PC = 0; \ - trace_inst.wid = schedule_w_; \ - trace_inst.irs1 = -1; \ - trace_inst.irs2 = -1; \ - trace_inst.frs1 = -1; \ - trace_inst.frs2 = -1; \ - trace_inst.frs3 = -1; \ - trace_inst.frd = -1; \ - trace_inst.ird = -1; \ - trace_inst.vrs1 = -1; \ - trace_inst.vrs2 = -1; \ - trace_inst.vrd = -1; \ - trace_inst.is_lw = false; \ - trace_inst.is_sw = false; \ - if (trace_inst.mem_addresses != NULL) \ - free(trace_inst.mem_addresses); \ - trace_inst.mem_addresses = (unsigned *)malloc(32 * sizeof(unsigned)); \ - for (int tid = 0; tid < arch_.num_threads(); tid++) \ - trace_inst.mem_addresses[tid] = 0xdeadbeef; \ - trace_inst.mem_stall_cycles = 0; \ - trace_inst.fetch_stall_cycles = 0; \ - trace_inst.stall_warp = false; \ - trace_inst.wspawn = false; \ - trace_inst.stalled = false; - -#define CPY_TRACE(drain, source) \ - drain.valid = source.valid; \ - drain.PC = source.PC; \ - drain.wid = source.wid; \ - drain.irs1 = source.irs1; \ - drain.irs2 = source.irs2; \ - drain.ird = source.ird; \ - drain.frs1 = source.frs1; \ - drain.frs2 = source.frs2; \ - drain.frs3 = source.frs3; \ - drain.frd = source.frd; \ - drain.vrs1 = source.vrs1; \ - drain.vrs2 = source.vrs2; \ - drain.vrd = source.vrd; \ - drain.is_lw = source.is_lw; \ - drain.is_sw = source.is_sw; \ - for (int tid = 0; tid < arch_.num_threads(); tid++) \ - drain.mem_addresses[tid] = source.mem_addresses[tid]; \ - drain.mem_stall_cycles = source.mem_stall_cycles; \ - drain.fetch_stall_cycles = source.fetch_stall_cycles; \ - drain.stall_warp = source.stall_warp; \ - drain.wspawn = source.wspawn; \ - drain.stalled = false; - using namespace vortex; -void printTrace(trace_inst_t *trace, const char *stage_name) { - __unused(trace, stage_name); - D(4, stage_name << ": valid=" << trace->valid); - D(4, stage_name << ": PC=" << std::hex << trace->PC << std::dec); - D(4, stage_name << ": wid=" << trace->wid); - D(4, stage_name << ": rd=" << trace->ird << ", rs1=" << trace->irs1 << ", trs2=" << trace->irs2); - D(4, stage_name << ": is_lw=" << trace->is_lw); - D(4, stage_name << ": is_sw=" << trace->is_sw); - D(4, stage_name << ": fetch_stall_cycles=" << trace->fetch_stall_cycles); - D(4, stage_name << ": mem_stall_cycles=" << trace->mem_stall_cycles); - D(4, stage_name << ": stall_warp=" << trace->stall_warp); - D(4, stage_name << ": wspawn=" << trace->wspawn); - D(4, stage_name << ": stalled=" << trace->stalled); -} - Core::Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id) : id_(id) , arch_(arch) , decoder_(decoder) , mem_(mem) , shared_mem_(1, SMEM_SIZE) - , steps_(0) - , num_insts_(0) { + , inst_in_schedule_("schedule") + , inst_in_fetch_("fetch") + , inst_in_decode_("decode") + , inst_in_issue_("issue") + , inst_in_execute_("execute") + , inst_in_writeback_("writeback") { + in_use_iregs_.resize(arch.num_warps(), 0); + in_use_fregs_.resize(arch.num_warps(), 0); + in_use_vregs_.reset(); - foundSchedule_ = true; - schedule_w_ = 0; + csrs_.resize(arch_.num_csrs(), 0); - memset(&inst_in_fetch_, 0, sizeof(inst_in_fetch_)); - memset(&inst_in_decode_, 0, sizeof(inst_in_decode_)); - memset(&inst_in_scheduler_, 0, sizeof(inst_in_scheduler_)); - memset(&inst_in_exe_, 0, sizeof(inst_in_exe_)); - memset(&inst_in_lsu_, 0, sizeof(inst_in_lsu_)); - memset(&inst_in_wb_, 0, sizeof(inst_in_wb_)); - - INIT_TRACE(inst_in_fetch_); - INIT_TRACE(inst_in_decode_); - INIT_TRACE(inst_in_scheduler_); - INIT_TRACE(inst_in_exe_); - INIT_TRACE(inst_in_lsu_); - INIT_TRACE(inst_in_wb_); - - iRenameTable_.resize(arch.num_warps(), std::vector(arch.num_regs(), false)); - fRenameTable_.resize(arch.num_warps(), std::vector(arch.num_regs(), false)); - vRenameTable_.resize(arch.num_regs(), false); - - csrs_.resize(arch_.num_csrs()); + fcsrs_.resize(arch_.num_warps(), 0); barriers_.resize(arch_.num_barriers(), 0); - stalled_warps_.resize(arch.num_warps(), false); - + warps_.resize(arch_.num_warps()); for (int i = 0; i < arch_.num_warps(); ++i) { - warps_.emplace_back(this, i); + warps_[i] = std::make_shared(this, i); } - warps_[0].setTmask(0, true); + this->clear(); } -Core::~Core() { - //-- +void Core::clear() { + for (int w = 0; w < arch_.num_warps(); ++w) { + in_use_iregs_[w].reset(); + in_use_fregs_[w].reset(); + } + stalled_warps_.reset(); + + in_use_vregs_.reset(); + + for (auto& csr : csrs_) { + csr = 0; + } + + for (auto& fcsr : fcsrs_) { + fcsr = 0; + } + + for (auto& barrier : barriers_) { + barrier.reset(); + } + + for (auto warp : warps_) { + warp->clear(); + } + + inst_in_schedule_.clear(); + inst_in_fetch_.clear(); + inst_in_decode_.clear(); + inst_in_issue_.clear(); + inst_in_execute_.clear(); + inst_in_writeback_.clear(); + + steps_ = 0; + insts_ = 0; + loads_ = 0; + stores_ = 0; + + inst_in_schedule_.valid = true; + warps_[0]->setTmask(0, true); } void Core::step() { @@ -138,243 +96,152 @@ void Core::step() { DPN(3, "\n"); this->writeback(); - this->load_store(); - this->execute_unit(); - this->scheduler(); + this->execute(); + this->issue(); this->decode(); this->fetch(); + this->schedule(); DPN(3, std::flush); } -void Core::warpScheduler() { - foundSchedule_ = false; - int next_warp = schedule_w_; +void Core::schedule() { + if (!inst_in_schedule_.enter(&inst_in_fetch_)) + return; + + bool foundSchedule = false; + int scheduled_warp = inst_in_schedule_.wid; + for (size_t wid = 0; wid < warps_.size(); ++wid) { // round robin scheduling - next_warp = (next_warp + 1) % warps_.size(); - bool is_active = warps_[next_warp].active(); - bool stalled = stalled_warps_[next_warp]; + scheduled_warp = (scheduled_warp + 1) % warps_.size(); + bool is_active = warps_[scheduled_warp]->active(); + bool stalled = stalled_warps_[scheduled_warp]; if (is_active && !stalled) { - foundSchedule_ = true; + foundSchedule = true; break; } } - schedule_w_ = next_warp; + + if (!foundSchedule) + return; + + D(3, "Schedule: wid=" << scheduled_warp); + inst_in_schedule_.wid = scheduled_warp; + + // advance pipeline + inst_in_schedule_.next(&inst_in_fetch_); } void Core::fetch() { - if ((!inst_in_scheduler_.stalled) - && (inst_in_fetch_.fetch_stall_cycles == 0)) { - INIT_TRACE(inst_in_fetch_); + if (!inst_in_fetch_.enter(&inst_in_issue_)) + return; - if (foundSchedule_) { - auto active_threads_b = warps_[schedule_w_].getActiveThreads(); - num_insts_ = num_insts_ + warps_[schedule_w_].getActiveThreads(); + int wid = inst_in_fetch_.wid; + + auto active_threads_b = warps_[wid]->getActiveThreads(); + warps_[wid]->step(&inst_in_fetch_); + auto active_threads_a = warps_[wid]->getActiveThreads(); - warps_[schedule_w_].step(&inst_in_fetch_); - - auto active_threads_a = warps_[schedule_w_].getActiveThreads(); - if (active_threads_b != active_threads_a) { - D(3, "** warp #" << schedule_w_ << " active threads changed from " << active_threads_b << " to " << active_threads_a); - } - - this->getCacheDelays(&inst_in_fetch_); - - if (inst_in_fetch_.stall_warp) { - stalled_warps_[inst_in_fetch_.wid] = true; - } - } - this->warpScheduler(); - } else { - inst_in_fetch_.stalled = false; - if (inst_in_fetch_.fetch_stall_cycles > 0) - --inst_in_fetch_.fetch_stall_cycles; + insts_ += active_threads_b; + if (active_threads_b != active_threads_a) { + D(3, "** warp #" << wid << " active threads changed from " << active_threads_b << " to " << active_threads_a); } - printTrace(&inst_in_fetch_, "Fetch"); + if (inst_in_fetch_.stall_warp) { + D(3, "** warp #" << wid << " stalled"); + stalled_warps_[wid] = true; + } + + D(4, inst_in_fetch_); + + // advance pipeline + inst_in_fetch_.next(&inst_in_issue_); } void Core::decode() { - if ((inst_in_fetch_.fetch_stall_cycles == 0) - && !inst_in_scheduler_.stalled) { - CPY_TRACE(inst_in_decode_, inst_in_fetch_); - INIT_TRACE(inst_in_fetch_); - } -} - -void Core::scheduler() { - if (!inst_in_scheduler_.stalled) { - CPY_TRACE(inst_in_scheduler_, inst_in_decode_); - INIT_TRACE(inst_in_decode_); - } -} - -void Core::load_store() { - if ((inst_in_lsu_.mem_stall_cycles > 0) || inst_in_lsu_.stalled) { - // LSU currently busy - if ((inst_in_scheduler_.is_lw || inst_in_scheduler_.is_sw)) { - inst_in_scheduler_.stalled = true; - } - } else { - if (!inst_in_scheduler_.is_lw && !inst_in_scheduler_.is_sw) - return; - - // Scheduler has LSU inst - bool scheduler_srcs_busy = false; - - if (inst_in_scheduler_.irs1 > 0) { - scheduler_srcs_busy = scheduler_srcs_busy || iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.irs1]; - } - - if (inst_in_scheduler_.irs2 > 0) { - scheduler_srcs_busy = scheduler_srcs_busy || iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.irs2]; - } - - if (inst_in_scheduler_.frs1 >= 0) { - scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs1]; - } - - if (inst_in_scheduler_.frs2 >= 0) { - scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs2]; - } - - if (inst_in_scheduler_.frs3 >= 0) { - scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs3]; - } - - if (inst_in_scheduler_.vrs1 >= 0) { - scheduler_srcs_busy = scheduler_srcs_busy || vRenameTable_[inst_in_scheduler_.vrs1]; - } - if (inst_in_scheduler_.vrs2 >= 0) { - scheduler_srcs_busy = scheduler_srcs_busy || vRenameTable_[inst_in_scheduler_.vrs2]; - } - - if (scheduler_srcs_busy) { - inst_in_scheduler_.stalled = true; - } else { - if (inst_in_scheduler_.ird > 0) - iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.ird] = true; - - if (inst_in_scheduler_.frd >= 0) - fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frd] = true; - - if (inst_in_scheduler_.vrd >= 0) - vRenameTable_[inst_in_scheduler_.vrd] = true; - - CPY_TRACE(inst_in_lsu_, inst_in_scheduler_); - INIT_TRACE(inst_in_scheduler_); - } - } - - if (inst_in_lsu_.mem_stall_cycles > 0) - inst_in_lsu_.mem_stall_cycles--; -} - -void Core::execute_unit() { - if (inst_in_scheduler_.is_lw || inst_in_scheduler_.is_sw) + if (!inst_in_decode_.enter(&inst_in_issue_)) return; - bool scheduler_srcs_busy = false; + // advance pipeline + inst_in_decode_.next(&inst_in_issue_); +} - if (inst_in_scheduler_.irs1 > 0) { - scheduler_srcs_busy = scheduler_srcs_busy || iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.irs1]; +void Core::issue() { + if (!inst_in_issue_.enter(&inst_in_execute_)) + return; + + bool in_use_regs = (inst_in_issue_.used_iregs & in_use_iregs_[inst_in_issue_.wid]) != 0 + || (inst_in_issue_.used_fregs & in_use_fregs_[inst_in_issue_.wid]) != 0 + || (inst_in_issue_.used_vregs & in_use_vregs_) != 0; + + if (in_use_regs) { + D(3, "Issue: registers not ready!"); + inst_in_issue_.stalled = true; + return; + } + + switch (inst_in_issue_.rdest_type) { + case 1: + if (inst_in_issue_.rdest) + in_use_iregs_[inst_in_issue_.wid][inst_in_issue_.rdest] = 1; + break; + case 2: + in_use_fregs_[inst_in_issue_.wid][inst_in_issue_.rdest] = 1; + break; + case 3: + in_use_vregs_[inst_in_issue_.rdest] = 1; + break; + default: + break; } - if (inst_in_scheduler_.irs2 > 0) { - scheduler_srcs_busy = scheduler_srcs_busy || iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.irs2]; - } + // advance pipeline + inst_in_issue_.next(&inst_in_execute_); +} - if (inst_in_scheduler_.frs1 >= 0) { - scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs1]; - } +void Core::execute() { + if (!inst_in_execute_.enter(&inst_in_writeback_)) + return; - if (inst_in_scheduler_.frs2 >= 0) { - scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs2]; - } - - if (inst_in_scheduler_.frs3 >= 0) { - scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs3]; - } - - if (inst_in_scheduler_.vrs1 >= 0) { - scheduler_srcs_busy = scheduler_srcs_busy || vRenameTable_[inst_in_scheduler_.vrs1]; - } - - if (inst_in_scheduler_.vrs2 >= 0) { - scheduler_srcs_busy = scheduler_srcs_busy || vRenameTable_[inst_in_scheduler_.vrs2]; - } - - if (scheduler_srcs_busy) { - D(3, "Execute: srcs not ready!"); - inst_in_scheduler_.stalled = true; - } else { - if (inst_in_scheduler_.ird > 0) { - iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.ird] = true; - } - - if (inst_in_scheduler_.frd >= 0) { - fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frd] = true; - } - - if (inst_in_scheduler_.vrd >= 0) { - vRenameTable_[inst_in_scheduler_.vrd] = true; - } - - CPY_TRACE(inst_in_exe_, inst_in_scheduler_); - INIT_TRACE(inst_in_scheduler_); - } + // advance pipeline + inst_in_execute_.next(&inst_in_writeback_); } void Core::writeback() { - if (inst_in_wb_.ird > 0) { - iRenameTable_[inst_in_wb_.wid][inst_in_wb_.ird] = false; + if (!inst_in_writeback_.enter(NULL)) + return; + + switch (inst_in_writeback_.rdest_type) { + case 1: + in_use_iregs_[inst_in_writeback_.wid][inst_in_writeback_.rdest] = 0; + break; + case 2: + in_use_fregs_[inst_in_writeback_.wid][inst_in_writeback_.rdest] = 0; + break; + case 3: + in_use_vregs_[inst_in_writeback_.rdest] = 0; + break; + default: + break; } - if (inst_in_wb_.frd >= 0) { - fRenameTable_[inst_in_wb_.wid][inst_in_wb_.frd] = false; + if (inst_in_writeback_.stall_warp) { + stalled_warps_[inst_in_writeback_.wid] = 0; } - if (inst_in_wb_.vrd >= 0) { - vRenameTable_[inst_in_wb_.vrd] = false; - } - - if (inst_in_wb_.stall_warp) { - stalled_warps_[inst_in_wb_.wid] = false; - } - - INIT_TRACE(inst_in_wb_); - - bool serviced_exe = false; - if ((inst_in_exe_.ird > 0) - || (inst_in_exe_.frd >= 0) - || (inst_in_exe_.vrd >= 0) - || (inst_in_exe_.stall_warp)) { - CPY_TRACE(inst_in_wb_, inst_in_exe_); - INIT_TRACE(inst_in_exe_); - serviced_exe = true; - } - - if (inst_in_lsu_.is_sw) { - INIT_TRACE(inst_in_lsu_); - } else { - if (((inst_in_lsu_.ird > 0) - || (inst_in_lsu_.frd >= 0) - || (inst_in_lsu_.vrd >= 0)) - && (inst_in_lsu_.mem_stall_cycles == 0)) { - if (serviced_exe) { - // Stalling LSU because EXE is busy - inst_in_lsu_.stalled = true; - } else { - CPY_TRACE(inst_in_wb_, inst_in_lsu_); - INIT_TRACE(inst_in_lsu_); - } - } - } + // advance pipeline + inst_in_writeback_.next(NULL); } Word Core::get_csr(Addr addr, int tid, int wid) { - if (addr == CSR_WTID) { + if (addr == CSR_FFLAGS) { + return fcsrs_.at(wid) & 0x1F; + } else if (addr == CSR_FRM) { + return (fcsrs_.at(wid) >> 5); + } else if (addr == CSR_FCSR) { + return fcsrs_.at(wid); + } else if (addr == CSR_WTID) { // Warp threadID return tid; } else if (addr == CSR_LTID) { @@ -404,10 +271,10 @@ Word Core::get_csr(Addr addr, int tid, int wid) { return arch_.num_cores(); } else if (addr == CSR_INSTRET) { // NumInsts - return num_insts_; + return insts_; } else if (addr == CSR_INSTRET_H) { // NumInsts - return (Word)(num_insts_ >> 32); + return (Word)(insts_ >> 32); } else if (addr == CSR_CYCLE) { // NumCycles return (Word)steps_; @@ -419,8 +286,16 @@ Word Core::get_csr(Addr addr, int tid, int wid) { } } -void Core::set_csr(Addr addr, Word value) { - csrs_.at(addr) = value; +void Core::set_csr(Addr addr, Word value, int /*tid*/, int wid) { + if (addr == CSR_FFLAGS) { + fcsrs_.at(wid) = (fcsrs_.at(wid) & ~0x1F) | (value & 0x1F); + } else if (addr == CSR_FRM) { + fcsrs_.at(wid) = (fcsrs_.at(wid) & ~0xE0) | (value << 5); + } else if (addr == CSR_FCSR) { + fcsrs_.at(wid) = value & 0xff; + } else { + csrs_.at(addr) = value; + } } void Core::barrier(int bar_id, int count, int warp_id) { @@ -430,7 +305,7 @@ void Core::barrier(int bar_id, int count, int warp_id) { return; for (int i = 0; i < arch_.num_warps(); ++i) { if (barrier.test(i)) { - warps_.at(i).activate(); + warps_.at(i)->activate(); } } barrier.reset(); @@ -441,6 +316,7 @@ Word Core::icache_fetch(Addr addr, bool sup) { } Word Core::dcache_read(Addr addr, bool sup) { + ++loads_; #ifdef SM_ENABLE if ((addr >= (SHARED_MEM_BASE_ADDR - SMEM_SIZE)) && ((addr + 4) <= SHARED_MEM_BASE_ADDR)) { @@ -451,6 +327,7 @@ Word Core::dcache_read(Addr addr, bool sup) { } void Core::dcache_write(Addr addr, Word data, bool sup, Size size) { + ++stores_; #ifdef SM_ENABLE if ((addr >= (SHARED_MEM_BASE_ADDR - SMEM_SIZE)) && ((addr + 4) <= SHARED_MEM_BASE_ADDR)) { @@ -461,36 +338,17 @@ void Core::dcache_write(Addr addr, Word data, bool sup, Size size) { mem_.write(addr, data, sup, size); } -void Core::getCacheDelays(trace_inst_t *trace_inst) { - trace_inst->fetch_stall_cycles += 1; - if (trace_inst->is_sw || trace_inst->is_lw) { - trace_inst->mem_stall_cycles += 3; - } -} - bool Core::running() const { - bool stages_have_valid = inst_in_fetch_.valid - || inst_in_decode_.valid - || inst_in_scheduler_.valid - || inst_in_lsu_.valid - || inst_in_exe_.valid - || inst_in_wb_.valid; - - if (stages_have_valid) - return true; - - for (unsigned i = 0; i < warps_.size(); ++i) { - if (warps_[i].active()) { - return true; - } - } - return false; + return inst_in_fetch_.valid + || inst_in_decode_.valid + || inst_in_issue_.valid + || inst_in_execute_.valid + || inst_in_writeback_.valid; } void Core::printStats() const { - std::cout << "Total steps: " << steps_ << std::endl; - for (unsigned i = 0; i < warps_.size(); ++i) { - std::cout << "=== Warp " << i << " ===" << std::endl; - warps_[i].printStats(); - } + std::cout << "Steps : " << steps_ << std::endl + << "Insts : " << insts_ << std::endl + << "Loads : " << loads_ << std::endl + << "Stores: " << stores_ << std::endl; } \ No newline at end of file diff --git a/simX/core.h b/simX/core.h index 050ca833..a1172c62 100644 --- a/simX/core.h +++ b/simX/core.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "debug.h" @@ -13,14 +14,15 @@ #include "decode.h" #include "mem.h" #include "warp.h" -#include "trace.h" +#include "pipeline.h" namespace vortex { class Core { public: - Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id = 0); - ~Core(); + Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id); + + void clear(); bool running() const; @@ -33,7 +35,7 @@ public: } Warp& warp(int i) { - return warps_[i]; + return *warps_.at(i); } Decoder& decoder() { @@ -42,23 +44,23 @@ public: const ArchDef& arch() const { return arch_; - } - - Word interruptEntry() const { - return interruptEntry_; } unsigned long num_insts() const { - return num_insts_; + return insts_; } unsigned long num_steps() const { return steps_; } + Word getIRegValue(int reg) const { + return warps_[0]->getIRegValue(reg); + } + Word get_csr(Addr addr, int tid, int wid); - void set_csr(Addr addr, Word value); + void set_csr(Addr addr, Word value, int tid, int wid); void barrier(int bar_id, int count, int warp_id); @@ -70,20 +72,22 @@ public: private: + void schedule(); void fetch(); void decode(); - void scheduler(); - void execute_unit(); - void load_store(); + void issue(); + void execute(); void writeback(); - - void getCacheDelays(trace_inst_t *); - void warpScheduler(); - - std::vector> iRenameTable_; - std::vector> fRenameTable_; - std::vector vRenameTable_; - std::vector stalled_warps_; + + + std::vector in_use_iregs_; + std::vector in_use_fregs_; + RegMask in_use_vregs_; + WarpMask stalled_warps_; + std::vector> warps_; + std::vector barriers_; + std::vector csrs_; + std::vector fcsrs_; Word id_; const ArchDef &arch_; @@ -91,22 +95,19 @@ private: MemoryUnit &mem_; #ifdef SM_ENABLE RAM shared_mem_; -#endif - std::vector warps_; - std::vector barriers_; - std::vector csrs_; - int schedule_w_; - uint64_t steps_; - uint64_t num_insts_; - Word interruptEntry_; - bool foundSchedule_; +#endif - trace_inst_t inst_in_fetch_; - trace_inst_t inst_in_decode_; - trace_inst_t inst_in_scheduler_; - trace_inst_t inst_in_exe_; - trace_inst_t inst_in_lsu_; - trace_inst_t inst_in_wb_; + Pipeline inst_in_schedule_; + Pipeline inst_in_fetch_; + Pipeline inst_in_decode_; + Pipeline inst_in_issue_; + Pipeline inst_in_execute_; + Pipeline inst_in_writeback_; + + uint64_t steps_; + uint64_t insts_; + uint64_t loads_; + uint64_t stores_; }; } // namespace vortex \ No newline at end of file diff --git a/simX/decode.cpp b/simX/decode.cpp index 723c11e6..d74cff86 100644 --- a/simX/decode.cpp +++ b/simX/decode.cpp @@ -11,47 +11,215 @@ #include "decode.h" #include "archdef.h" #include "instr.h" -#include "trace.h" using namespace vortex; struct InstTableEntry_t { - const char *opString; bool controlFlow; InstType iType; }; static const std::unordered_map sc_instTable = { - {Opcode::NOP, {"nop" , false, InstType::N_TYPE}}, - {Opcode::R_INST, {"r_type", false, InstType::R_TYPE}}, - {Opcode::L_INST, {"load" , false, InstType::I_TYPE}}, - {Opcode::I_INST, {"i_type", false, InstType::I_TYPE}}, - {Opcode::S_INST, {"store" , false, InstType::S_TYPE}}, - {Opcode::B_INST, {"branch", true , InstType::B_TYPE}}, - {Opcode::LUI_INST, {"lui" , false, InstType::U_TYPE}}, - {Opcode::AUIPC_INST, {"auipc" , false, InstType::U_TYPE}}, - {Opcode::JAL_INST, {"jal" , true , InstType::J_TYPE}}, - {Opcode::JALR_INST, {"jalr" , true , InstType::I_TYPE}}, - {Opcode::SYS_INST, {"SYS" , true , InstType::I_TYPE}}, - {Opcode::FENCE, {"fence" , true , InstType::I_TYPE}}, - {Opcode::PJ_INST, {"pred j", true , InstType::R_TYPE}}, - {Opcode::GPGPU, {"gpgpu" , false, InstType::R_TYPE}}, - {Opcode::VSET_ARITH, {"vsetvl", false, InstType::V_TYPE}}, - {Opcode::VL, {"vl" , false, InstType::V_TYPE}}, - {Opcode::VS, {"vs" , false, InstType::V_TYPE}}, - {Opcode::FL, {"fl" , false, InstType::I_TYPE }}, - {Opcode::FS, {"fs" , false, InstType::S_TYPE }}, - {Opcode::FCI, {"fci" , false, InstType::R_TYPE }}, - {Opcode::FMADD, {"fma" , false, InstType::R4_TYPE }}, - {Opcode::FMSUB, {"fms" , false, InstType::R4_TYPE }}, - {Opcode::FMNMADD, {"fmnma" , false, InstType::R4_TYPE }}, - {Opcode::FMNMSUB, {"fmnms" , false, InstType::R4_TYPE }} + {Opcode::NOP, {false, InstType::N_TYPE}}, + {Opcode::R_INST, {false, InstType::R_TYPE}}, + {Opcode::L_INST, {false, InstType::I_TYPE}}, + {Opcode::I_INST, {false, InstType::I_TYPE}}, + {Opcode::S_INST, {false, InstType::S_TYPE}}, + {Opcode::B_INST, {true , InstType::B_TYPE}}, + {Opcode::LUI_INST, {false, InstType::U_TYPE}}, + {Opcode::AUIPC_INST, {false, InstType::U_TYPE}}, + {Opcode::JAL_INST, {true , InstType::J_TYPE}}, + {Opcode::JALR_INST, {true , InstType::I_TYPE}}, + {Opcode::SYS_INST, {true , InstType::I_TYPE}}, + {Opcode::FENCE, {true , InstType::I_TYPE}}, + {Opcode::FL, {false, InstType::I_TYPE}}, + {Opcode::FS, {false, InstType::S_TYPE}}, + {Opcode::FCI, {false, InstType::R_TYPE}}, + {Opcode::FMADD, {false, InstType::R4_TYPE}}, + {Opcode::FMSUB, {false, InstType::R4_TYPE}}, + {Opcode::FMNMADD, {false, InstType::R4_TYPE}}, + {Opcode::FMNMSUB, {false, InstType::R4_TYPE}}, + {Opcode::VSET, {false, InstType::V_TYPE}}, + {Opcode::GPGPU, {false, InstType::R_TYPE}}, }; -std::ostream &vortex::operator<<(std::ostream &os, Instr &instr) { - os << std::dec << sc_instTable.at(instr.opcode_).opString; +static const char* op_string(const Instr &instr) { + Word func3 = instr.getFunc3(); + Word func7 = instr.getFunc7(); + Word rs2 = instr.getRSrc(1); + Word imm = instr.getImm(); + switch (instr.getOpcode()) { + case Opcode::NOP: return "NOP"; + case Opcode::LUI_INST: return "LUI"; + case Opcode::AUIPC_INST: return "AUIPC"; + case Opcode::R_INST: + if (func7 & 0x1) { + switch (func3) { + case 0: return "MUL"; + case 1: return "MULH"; + case 2: return "MULHSU"; + case 3: return "MULHU"; + case 4: return "DIV"; + case 5: return "DIVU"; + case 6: return "REM"; + case 7: return "REMU"; + } + } else { + switch (func3) { + case 0: return func7 ? "SUB" : "ADD"; + case 1: return "SLL"; + case 2: return "SLT"; + case 3: return "SLTU"; + case 4: return "XOR"; + case 5: return func7 ? "SRA" : "SRL"; + case 6: return "OR"; + case 7: return "AND"; + } + } + case Opcode::I_INST: + switch (func3) { + case 0: return func7 ? "SUBI" : "ADDI"; + case 1: return "SLLI"; + case 2: return "SLTI"; + case 3: return "SLTIU"; + case 4: return "XORI"; + case 5: return func7 ? "SRAI" : "SRLI"; + case 6: return "ORI"; + case 7: return "ANDI"; + } + case Opcode::B_INST: + switch (func3) { + case 0: return "BEQ"; + case 1: return "BNE"; + case 4: return "BLT"; + case 5: return "BGE"; + case 6: return "BLTU"; + case 7: return "BGEU"; + default: + std::abort(); + } + case Opcode::JAL_INST: return "JAL"; + case Opcode::JALR_INST: return "JALR"; + case Opcode::L_INST: + switch (func3) { + case 0: return "LBI"; + case 1: return "LHI"; + case 2: return "LW"; + case 4: return "LBU"; + case 5: return "LHU"; + default: + std::abort(); + } + case Opcode::S_INST: + switch (func3) { + case 0: return "SB"; + case 1: return "SH"; + case 2: return "SW"; + default: + std::abort(); + } + case Opcode::SYS_INST: + switch (func3) { + case 0: return imm ? "EBREAK" : "ECALL"; + case 1: return "CSRRW"; + case 2: return "CSRRS"; + case 3: return "CSRRC"; + case 5: return "CSRRWI"; + case 6: return "CSRRSI"; + case 7: return "CSRRCI"; + default: + std::abort(); + } + case Opcode::FENCE: return "FENCE"; + case Opcode::FL: return (func3 == 0x2) ? "FL" : "VL"; + case Opcode::FS: return (func3 == 0x2) ? "FS" : "VS"; + case Opcode::FCI: + switch (func7) { + case 0x00: return "FADD"; + case 0x04: return "FSUB"; + case 0x08: return "FMUL"; + case 0x0c: return "FDIV"; + case 0x2c: return "FSQRT"; + case 0x10: + switch (func3) { + case 0: return "FSGNJ"; + case 1: return "FSGNJN"; + case 2: return "FSGNJX"; + default: + std::abort(); + } + case 0x14: + switch (func3) { + case 0: return "FMIM"; + case 1: return "FMAX"; + default: + std::abort(); + } + case 0x50: + switch (func3) { + case 0: return "FLE"; + case 1: return "FLT"; + case 2: return "FEQ"; + default: + std::abort(); + } + case 0x60: return rs2 ? "FCVT.WU" : "FCVT.W"; + case 0x68: return rs2 ? "FCVT.S" : "FCVT.S"; + case 0x70: return func3 ? "FLASS" : "FMV.X.W"; + case 0x78: return "FMV.W"; + default: + std::abort(); + } + case Opcode::FMADD: return "FMADD"; + case Opcode::FMSUB: return "FMSUB"; + case Opcode::FMNMADD: return "FMNMADD"; + case Opcode::FMNMSUB: return "FMNMSUB"; + case Opcode::VSET: return "VSET"; + case Opcode::GPGPU: + switch (func3) { + case 0: return "TMC"; + case 1: return "WSPAWN"; + case 2: return "SPLIT"; + case 3: return "JOIN"; + case 4: return "BAR"; + default: + std::abort(); + } + default: + std::abort(); + } +} + +namespace vortex { +std::ostream &operator<<(std::ostream &os, const Instr &instr) { + os << op_string(instr) << ": "; + int rdt = instr.getRDType(); + int rd = instr.getRDest(); + switch (rdt) { + case 1: os << "r" << std::dec << rd << " <- "; break; + case 2: os << "fr" << std::dec << rd << " <- "; break; + case 3: os << "vr" << std::dec << rd << " <- "; break; + default: break; + } + int i = 0; + for (; i < instr.getNRSrc(); ++i) { + int rst = instr.getRSType(i); + int rs = instr.getRSrc(i); + if (i) os << ", "; + switch (rst) { + case 1: os << "r" << std::dec << rs; break; + case 2: os << "fr" << std::dec << rs; break; + case 3: os << "vr" << std::dec << rs; break; + default: break; + } + } + if (instr.hasImm()) { + if (i) os << ", "; + os << "imm=0x" << std::hex << instr.getImm(); + } return os; } +} Decoder::Decoder(const ArchDef &arch) { inst_s_ = arch.wsize() * 8; @@ -63,22 +231,16 @@ Decoder::Decoder(const ArchDef &arch) { vmask_s_ = 1; shift_opcode_ = 0; - shift_rd_ = opcode_s_; - shift_func3_ = opcode_s_ + reg_s_; - shift_rs1_ = opcode_s_ + reg_s_ + func3_s_; - shift_rs2_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_; - shift_func7_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_; - shift_func2_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_; - shift_rs3_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_ + func2_s_; - shift_j_u_immed_ = opcode_s_ + reg_s_; - shift_s_b_immed_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_; - shift_i_immed_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_; - shift_vset_immed_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_; - shift_vmask_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_; - shift_vmop_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_ + vmask_s_; - shift_vnf_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_ + vmask_s_ + mop_s_; - shift_func6_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_ + 1; - shift_vset_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_ + 6; + shift_rd_ = opcode_s_; + shift_func3_ = shift_rd_ + reg_s_; + shift_rs1_ = shift_func3_ + func3_s_; + shift_rs2_ = shift_rs1_ + reg_s_; + shift_func7_ = shift_rs2_ + reg_s_; + shift_rs3_ = shift_func7_ + func2_s_; + shift_vmop_ = shift_func7_ + vmask_s_; + shift_vnf_ = shift_vmop_ + mop_s_; + shift_func6_ = shift_func7_ + 1; + shift_vset_ = shift_func7_ + 6; reg_mask_ = 0x1f; func2_mask_ = 0x2; @@ -94,223 +256,174 @@ Decoder::Decoder(const ArchDef &arch) { v_imm_mask_ = 0x7ff; } -std::shared_ptr Decoder::decode( - const std::vector &v, - Size &idx, - trace_inst_t *trace_inst) -{ - Word code(readWord(v, idx, inst_s_ / 8)); - - // std::cout << "code: " << (int) code << " v: " << v << " indx: " << idx << "\n"; +std::shared_ptr Decoder::decode(Word code) { auto instr = std::make_shared(); - Opcode op = (Opcode)((code >> shift_opcode_) & opcode_mask_); - // std::cout << "opcode: " << op << "\n"; instr->setOpcode(op); - Word imeed, dest_bits, imm_bits, bit_11, bits_4_1, bit_10_5, - bit_12, bits_19_12, bits_10_1, bit_20, unordered, func3; + Word func3 = (code >> shift_func3_) & func3_mask_; + Word func6 = (code >> shift_func6_) & func6_mask_; + Word func7 = (code >> shift_func7_) & func7_mask_; - InstType curInstType = sc_instTable.at(op).iType; + int rd = (code >> shift_rd_) & reg_mask_; + int rs1 = (code >> shift_rs1_) & reg_mask_; + int rs2 = (code >> shift_rs2_) & reg_mask_; + int rs3 = (code >> shift_rs3_) & reg_mask_; + + auto iType = sc_instTable.at(op).iType; if (op == Opcode::FL || op == Opcode::FS) { - // need to find out whether it is vector or floating point inst - Word width_bits = (code >> shift_func3_) & func3_mask_; - if ((width_bits == 0x1) || (width_bits == 0x2) - || (width_bits == 0x3) || (width_bits == 0x4)) { - curInstType = (op == Opcode::FL) ? InstType::I_TYPE : InstType::S_TYPE; + if (func3 != 0x2) { + iType = InstType::V_TYPE; } } - switch (curInstType) { + switch (iType) { case InstType::N_TYPE: break; case InstType::R_TYPE: - if (op == Opcode::FCI) { - instr->setDestFReg((code >> shift_rd_) & reg_mask_); - instr->setSrcFReg((code >> shift_rs1_) & reg_mask_); - instr->setSrcFReg((code >> shift_rs2_) & reg_mask_); + if (op == Opcode::FCI) { + instr->setSrcFReg(rs1); + instr->setSrcFReg(rs2); + switch (func7) { + case 0x50: // FLE, FLT, FEQ + case 0x60: // FCVT.WU, FCVT.W + case 0x70: // FLASS, FMV.X.W + instr->setDestReg(rd); + break; + default: + instr->setDestFReg(rd); + } } else { - instr->setDestReg((code >> shift_rd_) & reg_mask_); - instr->setSrcReg((code >> shift_rs1_) & reg_mask_); - instr->setSrcReg((code >> shift_rs2_) & reg_mask_); + instr->setDestReg(rd); + instr->setSrcReg(rs1); + instr->setSrcReg(rs2); } - instr->setFunc3((code >> shift_func3_) & func3_mask_); - instr->setFunc7((code >> shift_func7_) & func7_mask_); - break; - - case InstType::I_TYPE: - if (op == Opcode::FCI || op == Opcode::FL) { - instr->setDestFReg((code >> shift_rd_) & reg_mask_); - instr->setSrcFReg((code >> shift_rs1_) & reg_mask_); - } else { - instr->setDestReg((code >> shift_rd_) & reg_mask_); - instr->setSrcReg((code >> shift_rs1_) & reg_mask_); - } - instr->setFunc7((code >> shift_func7_) & func7_mask_); - func3 = (code >> shift_func3_) & func3_mask_; instr->setFunc3(func3); + instr->setFunc7(func7); + break; + + case InstType::I_TYPE: { + if (op == Opcode::FCI || op == Opcode::FL) { + instr->setDestFReg(rd); + instr->setSrcFReg(rs1); + } else { + instr->setDestReg(rd); + instr->setSrcReg(rs1); + } + instr->setFunc3(func3); + instr->setFunc7(func7); if ((func3 == 5) && (op != L_INST) && (op != Opcode::FL)) { - instr->setSrcImm(signExt(((code >> shift_rs2_) & reg_mask_), 5, reg_mask_)); + instr->setImm(signExt(rs2, 5, reg_mask_)); } else { - instr->setSrcImm(signExt(code >> shift_i_immed_, 12, i_imm_mask_)); + instr->setImm(signExt(code >> shift_rs2_, 12, i_imm_mask_)); } - break; + } break; - case InstType::S_TYPE: + case InstType::S_TYPE: { if (op == Opcode::FS) { - instr->setSrcFReg((code >> shift_rs1_) & reg_mask_); - instr->setSrcFReg((code >> shift_rs2_) & reg_mask_); + instr->setSrcFReg(rs1); + instr->setSrcFReg(rs2); } else { - instr->setSrcReg((code >> shift_rs1_) & reg_mask_); - instr->setSrcReg((code >> shift_rs2_) & reg_mask_); + instr->setSrcReg(rs1); + instr->setSrcReg(rs2); } - instr->setFunc3((code >> shift_func3_) & func3_mask_); - dest_bits = (code >> shift_rd_) & reg_mask_; - imm_bits = (code >> shift_s_b_immed_ & func7_mask_); - imeed = (imm_bits << reg_s_) | dest_bits; - instr->setSrcImm(signExt(imeed, 12, s_imm_mask_)); - break; + instr->setFunc3(func3); + Word imeed = (func7 << reg_s_) | rd; + instr->setImm(signExt(imeed, 12, s_imm_mask_)); + } break; - case InstType::B_TYPE: - instr->setSrcReg((code >> shift_rs1_) & reg_mask_); - instr->setSrcReg((code >> shift_rs2_) & reg_mask_); - instr->setFunc3((code >> shift_func3_) & func3_mask_); - - dest_bits = (code >> shift_rd_) & reg_mask_; - imm_bits = (code >> shift_s_b_immed_ & func7_mask_); - - bit_11 = dest_bits & 0x1; - bits_4_1 = dest_bits >> 1; - bit_10_5 = imm_bits & 0x3f; - bit_12 = imm_bits >> 6; - - imeed = 0 | (bits_4_1 << 1) | (bit_10_5 << 5) | (bit_11 << 11) | (bit_12 << 12); - instr->setSrcImm(signExt(imeed, 13, b_imm_mask_)); - break; + case InstType::B_TYPE: { + instr->setSrcReg(rs1); + instr->setSrcReg(rs2); + instr->setFunc3(func3); + Word bit_11 = rd & 0x1; + Word bits_4_1 = rd >> 1; + Word bit_10_5 = func7 & 0x3f; + Word bit_12 = func7 >> 6; + Word imeed = (bits_4_1 << 1) | (bit_10_5 << 5) | (bit_11 << 11) | (bit_12 << 12); + instr->setImm(signExt(imeed, 13, b_imm_mask_)); + } break; case InstType::U_TYPE: - instr->setDestReg((code >> shift_rd_) & reg_mask_); - instr->setSrcImm(signExt(code >> shift_j_u_immed_, 20, u_imm_mask_)); + instr->setDestReg(rd); + instr->setImm(signExt(code >> shift_func3_, 20, u_imm_mask_)); break; - case InstType::J_TYPE: - instr->setDestReg((code >> shift_rd_) & reg_mask_); - unordered = code >> shift_j_u_immed_; - bits_19_12 = unordered & 0xff; - bit_11 = (unordered >> 8) & 0x1; - bits_10_1 = (unordered >> 9) & 0x3ff; - bit_20 = (unordered >> 19) & 0x1; - imeed = 0 | (bits_10_1 << 1) | (bit_11 << 11) | (bits_19_12 << 12) | (bit_20 << 20); + case InstType::J_TYPE: { + instr->setDestReg(rd); + Word unordered = code >> shift_func3_; + Word bits_19_12 = unordered & 0xff; + Word bit_11 = (unordered >> 8) & 0x1; + Word bits_10_1 = (unordered >> 9) & 0x3ff; + Word bit_20 = (unordered >> 19) & 0x1; + Word imeed = 0 | (bits_10_1 << 1) | (bit_11 << 11) | (bits_19_12 << 12) | (bit_20 << 20); if (bit_20) { imeed |= ~j_imm_mask_; } - instr->setSrcImm(imeed); - break; + instr->setImm(imeed); + } break; case InstType::V_TYPE: - D(3, "Entered here: instr type = vector" << op); switch (op) { - case Opcode::VSET_ARITH: //TODO: arithmetic ops - instr->setDestVReg((code >> shift_rd_) & reg_mask_); - instr->setSrcVReg((code >> shift_rs1_) & reg_mask_); - func3 = (code >> shift_func3_) & func3_mask_; + case Opcode::VSET: { + instr->setDestVReg(rd); + instr->setSrcVReg(rs1); instr->setFunc3(func3); - D(3, "Entered here: instr type = vector"); - if (func3 == 7) { - D(3, "Entered here: imm instr"); - instr->setVsetImm(!(code >> shift_vset_)); - if (instr->getVsetImm()) { + instr->setImm(!(code >> shift_vset_)); + if (instr->getImm()) { Word immed = (code >> shift_rs2_) & v_imm_mask_; - D(3, "immed" << immed); - instr->setSrcImm(immed); //TODO + instr->setImm(immed); instr->setVlmul(immed & 0x3); - D(3, "lmul " << (immed & 0x3)); instr->setVediv((immed >> 4) & 0x3); - D(3, "ediv " << ((immed >> 4) & 0x3)); instr->setVsew((immed >> 2) & 0x3); - D(3, "sew " << ((immed >> 2) & 0x3)); } else { - instr->setSrcVReg((code >> shift_rs2_) & reg_mask_); + instr->setSrcVReg(rs2); } } else { - instr->setSrcVReg((code >> shift_rs2_) & reg_mask_); - instr->setVmask((code >> shift_vmask_) & 0x1); - instr->setFunc6((code >> shift_func6_) & func6_mask_); + instr->setSrcVReg(rs2); + instr->setVmask((code >> shift_func7_) & 0x1); + instr->setFunc6(func6); } - break; + } break; case Opcode::VL: - D(3, "vector load instr"); - instr->setDestVReg((code >> shift_rd_) & reg_mask_); - instr->setSrcVReg((code >> shift_rs1_) & reg_mask_); - instr->setVlsWidth((code >> shift_func3_) & func3_mask_); - instr->setSrcVReg((code >> shift_rs2_) & reg_mask_); - instr->setVmask((code >> shift_vmask_)); + instr->setDestVReg(rd); + instr->setSrcVReg(rs1); + instr->setVlsWidth(func3); + instr->setSrcVReg(rs2); + instr->setVmask(code >> shift_func7_); instr->setVmop((code >> shift_vmop_) & func3_mask_); instr->setVnf((code >> shift_vnf_) & func3_mask_); break; case Opcode::VS: - instr->setVs3((code >> shift_rd_) & reg_mask_); - instr->setSrcVReg((code >> shift_rs1_) & reg_mask_); - instr->setVlsWidth((code >> shift_func3_) & func3_mask_); - instr->setSrcVReg((code >> shift_rs2_) & reg_mask_); - instr->setVmask((code >> shift_vmask_)); + instr->setVs3(rd); + instr->setSrcVReg(rs1); + instr->setVlsWidth(func3); + instr->setSrcVReg(rs2); + instr->setVmask(code >> shift_func7_); instr->setVmop((code >> shift_vmop_) & func3_mask_); instr->setVnf((code >> shift_vnf_) & func3_mask_); break; default: - std::cout << "Inavlid opcode.\n"; std::abort(); } break; case R4_TYPE: - // RT: add R4_TYPE decoder - instr->setDestFReg((code >> shift_rd_) & reg_mask_); - instr->setSrcFReg((code >> shift_rs1_) & reg_mask_); - instr->setSrcFReg((code >> shift_rs2_) & reg_mask_); - instr->setSrcFReg((code >> shift_rs3_) & reg_mask_); - instr->setFunc3((code >> shift_func3_) & func3_mask_); + instr->setDestFReg(rd); + instr->setSrcFReg(rs1); + instr->setSrcFReg(rs2); + instr->setSrcFReg(rs3); + instr->setFunc3(func3); break; default: - std::cout << "Unrecognized argument class in word decoder.\n"; std::abort(); } - if (curInstType != InstType::N_TYPE) { - trace_inst->valid = true; - if (instr->hasRDest()) { - if (instr->is_FpDest()) { - trace_inst->frd = instr->getRDest(); - } else if (instr->is_VDest()) { - trace_inst->vrd = instr->getRDest(); - } else { - trace_inst->ird = instr->getRDest(); - } - } - - for (int i = 0; i < instr->getNRSrc(); ++i) { - if (instr->is_FpSrc(i)) { - if (i == 0) trace_inst->frs1 = instr->getRSrc(i); - else if (i == 1) trace_inst->frs2 = instr->getRSrc(i); - else if (i == 2) trace_inst->frs3 = instr->getRSrc(i); - else std::abort(); - } else if (instr->is_VSrc(i)) { - if (i == 0) trace_inst->vrs1 = instr->getRSrc(i); - else if (i == 1) trace_inst->vrs2 = instr->getRSrc(i); - else std::abort(); - } else { - if (i == 0) trace_inst->irs1 = instr->getRSrc(i); - else if (i == 1) trace_inst->irs2 = instr->getRSrc(i); - else std::abort(); - } - } - } - - D(2, "Decoded instr 0x" << std::hex << code << " into: " << *instr << std::flush); + D(2, "Instr 0x" << std::hex << code << ": " << *instr << std::flush); return instr; } diff --git a/simX/decode.h b/simX/decode.h index 4dd00913..6335a494 100644 --- a/simX/decode.h +++ b/simX/decode.h @@ -2,19 +2,18 @@ #include #include -#include "util.h" namespace vortex { class ArchDef; class Instr; -class trace_inst_t; +class Pipeline; class Decoder { public: Decoder(const ArchDef &); - virtual std::shared_ptr decode(const std::vector &v, Size &n, trace_inst_t * trace_inst); + std::shared_ptr decode(Word code); private: diff --git a/simX/execute.cpp b/simX/execute.cpp index 9a369191..39e4efb2 100644 --- a/simX/execute.cpp +++ b/simX/execute.cpp @@ -4,10 +4,9 @@ #include #include #include -#include -#include #include #include +#include #include #include "util.h" #include "warp.h" @@ -22,7 +21,7 @@ static bool checkUnanimous(unsigned p, const std::vector> &m, const ThreadMask &tm) { bool same; - unsigned i; + size_t i; for (i = 0; i < m.size(); ++i) { if (tm[i]) { same = m[i][p]; @@ -32,136 +31,20 @@ static bool checkUnanimous(unsigned p, if (i == m.size()) throw DivergentBranchException(); - //std::cout << "same: " << same << " with -> "; for (; i < m.size(); ++i) { if (tm[i]) { - //std::cout << " " << (bool(m[i][p])); if (same != (bool(m[i][p]))) { - //std::cout << " FALSE\n"; return false; } } } - //std::cout << " TRUE\n"; return true; } -// Convert 32-bit integer register file to IEEE-754 floating point number. -float intregToFloat(uint32_t input) { - // 31th bit - bool sign = input & 0x80000000; - // Exponent: 23th ~ 30th bits -> 8 bits in total - int32_t exp = ((input & 0x7F800000)>>23); - // printf("exp = %u\n", exp); - // 0th ~ 22th bits -> 23 bits fraction - uint32_t frac = input & 0x007FFFFF; - // Frac_value= 1 + sum{i = 1}{23}{b_{23-i}*2^{-i}} - double frac_value; - if (exp == 0) { // subnormal - if (frac == 0) { - // zero - if (sign) - return -0.0; - else - return 0.0; - } - frac_value = 0.0; - } else - frac_value = 1.0; - - for (int i = 0; i < 23; i++) { - int bi = frac & 0x1; - frac_value += static_cast(bi * pow(2.0, i-23)); - frac = (frac >> 1); - } - - return (float)((static_cast(pow(-1.0, sign))) * (static_cast(pow(2.0, exp - 127.0)))* frac_value); -} - -// Convert a floating point number to IEEE-754 32-bit representation, -// so that it could be stored in a 32-bit integer register file -// Reference: https://www.wikihow.com/Convert-a-Number-from-Decimal-to-IEEE-754-Floating-Point-Representation - // https://www.technical-recipes.com/2012/converting-between-binary-and-decimal-representations-of-ieee-754-floating-point-numbers-in-c/ -uint32_t floatToBin(float in_value) { - union { - float input; // assumes sizeof(float) == sizeof(int) - int output; - } data; - - data.input = in_value; - - std::bitset bits(data.output); - std::string mystring = bits.to_string, std::allocator >(); - // Convert binary to uint32_t - Word result = stoul(mystring, nullptr, 2); - return result; -} - -// print out floating point exception after execution -void show_fe_exceptions(void) { - printf("exceptions raised:"); - if(fetestexcept(FE_DIVBYZERO)) printf(" FE_DIVBYZERO"); - if(fetestexcept(FE_INEXACT)) printf(" FE_INEXACT"); - if(fetestexcept(FE_INVALID)) printf(" FE_INVALID"); - if(fetestexcept(FE_OVERFLOW)) printf(" FE_OVERFLOW"); - if(fetestexcept(FE_UNDERFLOW)) printf(" FE_UNDERFLOW"); - feclearexcept(FE_ALL_EXCEPT); - printf("\n"); -} - -// https://en.wikipedia.org/wiki/Single-precision_floating-point_format -// check floating-point number in binary format is NaN -uint8_t fpBinIsNan(uint32_t din) { - bool fsign = din & 0x80000000; - uint32_t expo = (din>>23) & 0x000000FF; - uint32_t fraction = din & 0x007FFFFF; - uint32_t bit_22 = din & 0x00400000; - - if ((expo==0xFF) && (fraction!=0)) { - // if (!fsign && (fraction == 0x00400000)) - if (!fsign && (bit_22)) - return 1; // quiet NaN, return 1 - else - return 2; // signaling NaN, return 2 - } - return 0; -} - -// check floating-point number in binary format is zero -uint8_t fpBinIsZero(uint32_t din) { - bool fsign = din & 0x80000000; - uint32_t expo = (din>>23) & 0x000000FF; - uint32_t fraction = din & 0x007FFFFF; - - if ((expo==0) && (fraction==0)) { - if (fsign) - return 1; // negative 0 - else - return 2; // positive 0 - } - return 0; // not zero -} - -// check floating-point number in binary format is infinity -uint8_t fpBinIsInf(uint32_t din) { - bool fsign = din & 0x80000000; - uint32_t expo = (din>>23) & 0x000000FF; - uint32_t fraction = din & 0x007FFFFF; - - if ((expo==0xFF) && (fraction==0)) { - if (fsign) - return 1; // negative infinity - else - return 2; // positive infinity - } - return 0; // not infinity -} - -void Warp::execute(Instr &instr, trace_inst_t *trace_inst) { +void Warp::execute(const Instr &instr, Pipeline *pipeline) { assert(tmask_.any()); - Word nextPC = PC_; - bool updatePC = false; + Word nextPC = PC_ + core_->arch().wsize(); bool runOnce = false; Word func3 = instr.getFunc3(); @@ -174,7 +57,7 @@ void Warp::execute(Instr &instr, trace_inst_t *trace_inst) { int rsrc1 = instr.getRSrc(1); int rsrc2 = instr.getRSrc(2); Word immsrc= instr.getImm(); - bool vmask = instr.getVmask(); + Word vmask = instr.getVmask(); int num_threads = core_->arch().num_threads(); for (int t = 0; t < num_threads; t++) { @@ -184,423 +67,369 @@ void Warp::execute(Instr &instr, trace_inst_t *trace_inst) { auto &iregs = iRegFile_.at(t); auto &fregs = fRegFile_.at(t); - ++insts_; + Word rsdata[3]; + Word iresult; + Word fresult; + int num_rsrcs = instr.getNRSrc(); + if (num_rsrcs) { + DPH(3, "[" << std::dec << t << "] Src Registers: "); + for (int i = 0; i < num_rsrcs; ++i) { + int rst = instr.getRSType(i); + int rs = instr.getRSrc(i); + if (i) DPN(3, ", "); + switch (rst) { + case 1: + rsdata[i] = iregs[rs]; + DPN(3, "r" << std::dec << rs << "=0x" << std::hex << rsdata[i]); + break; + case 2: + rsdata[i] = fregs[rs]; + DPN(3, "fr" << std::dec << rs << "=0x" << std::hex << rsdata[i]); + break; + default: break; + } + } + DPN(3, std::endl); + } + switch (opcode) { case NOP: - //std::cout << "NOP_INST\n"; + break; + case LUI_INST: + iresult = (immsrc << 12) & 0xfffff000; + break; + case AUIPC_INST: + iresult = ((immsrc << 12) & 0xfffff000) + PC_; break; case R_INST: { - // std::cout << "R_INST\n"; - Word is_mul_ext = func7 & 0x1; - if (is_mul_ext) { - // std::cout << "FOUND A MUL/DIV\n"; + if (func7 & 0x1) { switch (func3) { case 0: // MUL - D(3, "MUL: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1]); - if (rdest) iregs[rdest] = ((int)iregs[rsrc0]) * ((int)iregs[rsrc1]); + iresult = ((WordI)iregs[rsrc0]) * ((WordI)iregs[rsrc1]); break; - case 1: + case 1: { // MULH - D(3, "MULH: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1]); - { - int64_t first = (int64_t)iregs[rsrc0]; - if (iregs[rsrc0] & 0x80000000) { - first = first | 0xFFFFFFFF00000000; - } - int64_t second = (int64_t)iregs[rsrc1]; - if (iregs[rsrc1] & 0x80000000) { - second = second | 0xFFFFFFFF00000000; - } - // cout << "mulh: " << std::dec << first << " * " << second; - uint64_t result = first * second; - if (rdest) iregs[rdest] = (result >> 32) & 0xFFFFFFFF; - // cout << " = " << result << " or " << iregs[rdest] << "\n"; + int64_t first = (int64_t)iregs[rsrc0]; + if (iregs[rsrc0] & 0x80000000) { + first = first | 0xFFFFFFFF00000000; } - break; - case 2: - // MULHSU - D(3, "MULHSU: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1]); - { - int64_t first = (int64_t)iregs[rsrc0]; - if (iregs[rsrc0] & 0x80000000) { - first = first | 0xFFFFFFFF00000000; - } - int64_t second = (int64_t)iregs[rsrc1]; - if (rdest) iregs[rdest] = ((first * second) >> 32) & 0xFFFFFFFF; + int64_t second = (int64_t)iregs[rsrc1]; + if (iregs[rsrc1] & 0x80000000) { + second = second | 0xFFFFFFFF00000000; } - break; - case 3: + uint64_t result = first * second; + iresult = (result >> 32) & 0xFFFFFFFF; + } break; + case 2: { + // MULHSU + int64_t first = (int64_t)iregs[rsrc0]; + if (iregs[rsrc0] & 0x80000000) { + first = first | 0xFFFFFFFF00000000; + } + int64_t second = (int64_t)iregs[rsrc1]; + iresult = ((first * second) >> 32) & 0xFFFFFFFF; + } break; + case 3: { // MULHU - D(3, "MULHU: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1]); - { - uint64_t first = (uint64_t)iregs[rsrc0]; - uint64_t second = (uint64_t)iregs[rsrc1]; - // cout << "MULHU\n"; - if (rdest) iregs[rdest] = ((first * second) >> 32) & 0xFFFFFFFF; - } - break; - case 4: + uint64_t first = (uint64_t)iregs[rsrc0]; + uint64_t second = (uint64_t)iregs[rsrc1]; + iresult = ((first * second) >> 32) & 0xFFFFFFFF; + } break; + case 4: { // DIV - D(3, "DIV: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1]); - if (iregs[rsrc1] == 0) { - if (rdest) iregs[rdest] = -1; - break; + WordI dividen = iregs[rsrc0]; + WordI divisor = iregs[rsrc1]; + if (divisor == 0) { + iresult = -1; + } else if (dividen == WordI(0x80000000) && divisor == WordI(0xffffffff)) { + iresult = dividen; + } else { + iresult = dividen / divisor; } - // cout << "dividing: " << std::dec << ((int) iregs[rsrc0]) << " / " << ((int) iregs[rsrc1]); - if (rdest) iregs[rdest] = ((int)iregs[rsrc0]) / ((int)iregs[rsrc1]); - // cout << " = " << ((int) iregs[rdest]) << "\n"; - break; - case 5: + } break; + case 5: { // DIVU - D(3, "DIVU: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1]); - if (iregs[rsrc1] == 0) { - if (rdest) iregs[rdest] = -1; - break; + Word dividen = iregs[rsrc0]; + Word divisor = iregs[rsrc1]; + if (divisor == 0) { + iresult = -1; + } else { + iresult = dividen / divisor; } - if (rdest) iregs[rdest] = ((uint32_t)iregs[rsrc0]) / ((uint32_t)iregs[rsrc1]); - break; - case 6: + } break; + case 6: { // REM - D(3, "REM: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1]); + WordI dividen = iregs[rsrc0]; + WordI divisor = iregs[rsrc1]; if (iregs[rsrc1] == 0) { - if (rdest) iregs[rdest] = iregs[rsrc0]; - break; + iresult = dividen; + } else if (dividen == WordI(0x80000000) && divisor == WordI(0xffffffff)) { + iresult = 0; + } else { + iresult = dividen % divisor; } - if (rdest) iregs[rdest] = ((int)iregs[rsrc0]) % ((int)iregs[rsrc1]); - break; - case 7: + } break; + case 7: { // REMU - D(3, "REMU: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1]); + Word dividen = iregs[rsrc0]; + Word divisor = iregs[rsrc1]; if (iregs[rsrc1] == 0) { - if (rdest) iregs[rdest] = iregs[rsrc0]; - break; + iresult = dividen; + } else { + iresult = dividen % divisor; } - if (rdest) iregs[rdest] = ((uint32_t)iregs[rsrc0]) % ((uint32_t)iregs[rsrc1]); - break; + } break; default: std::cout << "unsupported MUL/DIV instr\n"; std::abort(); } } else { - // std::cout << "NORMAL R-TYPE\n"; switch (func3) { case 0: if (func7) { - D(3, "SUBI: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1]); - if (rdest) iregs[rdest] = iregs[rsrc0] - iregs[rsrc1]; + iresult = iregs[rsrc0] - iregs[rsrc1]; } else { - D(3, "ADDI: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1]); - if (rdest) iregs[rdest] = iregs[rsrc0] + iregs[rsrc1]; + iresult = iregs[rsrc0] + iregs[rsrc1]; } break; case 1: - D(3, "SLLI: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1]); - if (rdest) iregs[rdest] = iregs[rsrc0] << iregs[rsrc1]; + iresult = iregs[rsrc0] << iregs[rsrc1]; break; case 2: - D(3, "SLTI: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1]); - if (int(iregs[rsrc0]) < int(iregs[rsrc1])) { - if (rdest) iregs[rdest] = 1; + if (WordI(iregs[rsrc0]) < WordI(iregs[rsrc1])) { + iresult = 1; } else { - if (rdest) iregs[rdest] = 0; + iresult = 0; } break; case 3: - D(3, "SLTU: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1]); if (Word(iregs[rsrc0]) < Word(iregs[rsrc1])) { - if (rdest) iregs[rdest] = 1; + iresult = 1; } else { - if (rdest) iregs[rdest] = 0; + iresult = 0; } break; case 4: - D(3, "XORI: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1]); - if (rdest) iregs[rdest] = iregs[rsrc0] ^ iregs[rsrc1]; + iresult = iregs[rsrc0] ^ iregs[rsrc1]; break; case 5: if (func7) { - D(3, "SRLI: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1]); - if (rdest) iregs[rdest] = int(iregs[rsrc0]) >> int(iregs[rsrc1]); + iresult = WordI(iregs[rsrc0]) >> WordI(iregs[rsrc1]); } else { - D(3, "SRLU: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1]); - if (rdest) iregs[rdest] = Word(iregs[rsrc0]) >> Word(iregs[rsrc1]); + iresult = Word(iregs[rsrc0]) >> Word(iregs[rsrc1]); } break; case 6: - D(3, "ORI: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1]); - if (rdest) iregs[rdest] = iregs[rsrc0] | iregs[rsrc1]; + iresult = iregs[rsrc0] | iregs[rsrc1]; break; case 7: - D(3, "AND: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1]); - if (rdest) iregs[rdest] = iregs[rsrc0] & iregs[rsrc1]; + iresult = iregs[rsrc0] & iregs[rsrc1]; break; default: - std::cout << "ERROR: UNSUPPORTED R INST\n"; std::abort(); } } } break; case I_INST: - //std::cout << "I_INST\n"; switch (func3) { case 0: // ADDI - D(3, "ADDI: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", imm=" << immsrc); - if (rdest) iregs[rdest] = iregs[rsrc0] + immsrc; + iresult = iregs[rsrc0] + immsrc; + break; + case 1: + // SLLI + iresult = iregs[rsrc0] << immsrc; break; case 2: // SLTI - D(3, "SLTI: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", imm=" << immsrc); - if (int(iregs[rsrc0]) < int(immsrc)) { - if (rdest) iregs[rdest] = 1; + if (WordI(iregs[rsrc0]) < WordI(immsrc)) { + iresult = 1; } else { - if (rdest) iregs[rdest] = 0; + iresult = 0; } break; case 3: { // SLTIU - D(3, "SLTIU: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", imm=" << immsrc); - if (unsigned(iregs[rsrc0]) < unsigned(immsrc)) { - if (rdest) iregs[rdest] = 1; + if (Word(iregs[rsrc0]) < Word(immsrc)) { + iresult = 1; } else { - if (rdest) iregs[rdest] = 0; + iresult = 0; } } break; case 4: // XORI - D(3, "XORI: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", imm=0x" << std::hex << immsrc); - if (rdest) iregs[rdest] = iregs[rsrc0] ^ immsrc; + iresult = iregs[rsrc0] ^ immsrc; + break; + case 5: + if (func7) { + // SRAI + Word result = WordI(iregs[rsrc0]) >> immsrc; + iresult = result; + } else { + // SRLI + Word result = Word(iregs[rsrc0]) >> immsrc; + iresult = result; + } break; case 6: // ORI - D(3, "ORI: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", imm=0x" << std::hex << immsrc); - if (rdest) iregs[rdest] = iregs[rsrc0] | immsrc; + iresult = iregs[rsrc0] | immsrc; break; case 7: // ANDI - D(3, "ANDI: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", imm=0x" << std::hex << immsrc); - if (rdest) iregs[rdest] = iregs[rsrc0] & immsrc; - break; - case 1: - // SLLI - D(3, "SLLI: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", imm=0x" << std::hex << immsrc); - if (rdest) iregs[rdest] = iregs[rsrc0] << immsrc; - break; - case 5: - if ((func7 == 0)) { - // SRLI - D(3, "SRLI: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", imm=" << immsrc); - Word result = Word(iregs[rsrc0]) >> Word(immsrc); - if (rdest) iregs[rdest] = result; - } else { - // SRAI - D(3, "SRAI: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", imm=" << immsrc); - Word op1 = iregs[rsrc0]; - Word op2 = immsrc; - if (rdest) iregs[rdest] = op1 >> op2; - } + iresult = iregs[rsrc0] & immsrc; break; default: - std::cout << "ERROR: UNSUPPORTED L INST\n"; std::abort(); } break; - case L_INST: { - ++loads_; - Word memAddr = ((iregs[rsrc0] + immsrc) & 0xFFFFFFFC); - Word shift_by = ((iregs[rsrc0] + immsrc) & 0x00000003) * 8; - Word data_read = core_->dcache_read(memAddr, 0); - trace_inst->is_lw = true; - trace_inst->mem_addresses[t] = memAddr; - switch (func3) { - case 0: - // LBI - D(3, "LBI: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", imm=0x" << std::hex << immsrc); - if (rdest) iregs[rdest] = signExt((data_read >> shift_by) & 0xFF, 8, 0xFF); - break; - case 1: - // LWI - D(3, "LHI: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", imm=0x" << std::hex << immsrc); - if (rdest) iregs[rdest] = signExt((data_read >> shift_by) & 0xFFFF, 16, 0xFFFF); - break; - case 2: - // LDI - D(3, "LWI: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", imm=0x" << std::hex << immsrc); - if (rdest) iregs[rdest] = int(data_read & 0xFFFFFFFF); - break; - case 4: - // LBU - D(3, "LBU: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", imm=0x" << std::hex << immsrc); - if (rdest) iregs[rdest] = unsigned((data_read >> shift_by) & 0xFF); - break; - case 5: - // LWU - D(3, "LHU: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", imm=0x" << std::hex << immsrc); - if (rdest) iregs[rdest] = unsigned((data_read >> shift_by) & 0xFFFF); - break; - default: - std::cout << "ERROR: UNSUPPORTED L INST\n"; - std::abort(); - } - D(3, "LOAD MEM ADDRESS: " << std::hex << memAddr); - } break; - case S_INST: { - ++stores_; - Word memAddr = iregs[rsrc0] + immsrc; - trace_inst->is_sw = true; - trace_inst->mem_addresses[t] = memAddr; - switch (func3) { - case 0: - // SB - D(3, "SB: r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1] << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", imm=0x" << std::hex << immsrc); - core_->dcache_write(memAddr, iregs[rsrc1] & 0x000000FF, 0, 1); - break; - case 1: - // SH - D(3, "SH: r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1] << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", imm=0x" << std::hex << immsrc); - core_->dcache_write(memAddr, iregs[rsrc1], 0, 2); - break; - case 2: - // SW - D(3, "SW: r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1] << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", imm=0x" << std::hex << immsrc); - core_->dcache_write(memAddr, iregs[rsrc1], 0, 4); - break; - default: - std::cout << "ERROR: UNSUPPORTED S INST\n"; - std::abort(); - } - D(3, "STORE MEM ADDRESS: " << std::hex << memAddr); - } break; case B_INST: - trace_inst->stall_warp = true; switch (func3) { case 0: // BEQ - D(3, "BEQ: r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1] << ", imm=0x" << std::hex << immsrc); - if (int(iregs[rsrc0]) == int(iregs[rsrc1])) { - if (!updatePC) - nextPC = (PC_ - 4) + immsrc; - updatePC = true; + if (iregs[rsrc0] == iregs[rsrc1]) { + nextPC = PC_ + immsrc; } break; case 1: // BNE - D(3, "BNE: r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1] << ", imm=0x" << std::hex << immsrc); - if (int(iregs[rsrc0]) != int(iregs[rsrc1])) { - if (!updatePC) - nextPC = (PC_ - 4) + immsrc; - updatePC = true; + if (iregs[rsrc0] != iregs[rsrc1]) { + nextPC = PC_ + immsrc; } break; case 4: // BLT - D(3, "BLT: r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1] << ", imm=0x" << std::hex << immsrc); - if (int(iregs[rsrc0]) < int(iregs[rsrc1])) { - if (!updatePC) - nextPC = (PC_ - 4) + immsrc; - updatePC = true; + if (WordI(iregs[rsrc0]) < WordI(iregs[rsrc1])) { + nextPC = PC_ + immsrc; } break; case 5: // BGE - D(3, "BGE: r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1] << ", imm=0x" << std::hex << immsrc); - if (int(iregs[rsrc0]) >= int(iregs[rsrc1])) { - if (!updatePC) - nextPC = (PC_ - 4) + immsrc; - updatePC = true; + if (WordI(iregs[rsrc0]) >= WordI(iregs[rsrc1])) { + nextPC = PC_ + immsrc; } break; case 6: // BLTU - D(3, "BLTU: r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1] << ", imm=0x" << std::hex << immsrc); if (Word(iregs[rsrc0]) < Word(iregs[rsrc1])) { - if (!updatePC) - nextPC = (PC_ - 4) + immsrc; - updatePC = true; + nextPC = PC_ + immsrc; } break; case 7: // BGEU - D(3, "BGEU: r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1] << ", imm=0x" << std::hex << immsrc); if (Word(iregs[rsrc0]) >= Word(iregs[rsrc1])) { - if (!updatePC) - nextPC = (PC_ - 4) + immsrc; - updatePC = true; + nextPC = PC_ + immsrc; } break; } - break; - case LUI_INST: - D(3, "LUI: r" << std::dec << rdest << " <- imm=0x" << std::hex << immsrc); - if (rdest) iregs[rdest] = (immsrc << 12) & 0xfffff000; - break; - case AUIPC_INST: - D(3, "AUIPC: r" << std::dec << rdest << " <- imm=0x" << std::hex << immsrc); - if (rdest) iregs[rdest] = ((immsrc << 12) & 0xfffff000) + (PC_ - 4); + pipeline->stall_warp = true; + runOnce = true; break; case JAL_INST: - D(3, "JAL: r" << std::dec << rdest << " <- imm=0x" << std::hex << immsrc); - trace_inst->stall_warp = true; - if (!updatePC) { - nextPC = (PC_ - 4) + immsrc; - //std::cout << "JAL... SETTING PC: " << nextPC << "\n"; - } - if (rdest) iregs[rdest] = PC_; - updatePC = true; + iresult = nextPC; + nextPC = PC_ + immsrc; + pipeline->stall_warp = true; + runOnce = true; break; case JALR_INST: - D(3, "JALR: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", imm=0x" << std::hex << immsrc); - trace_inst->stall_warp = true; - if (!updatePC) { - nextPC = iregs[rsrc0] + immsrc; - //std::cout << "JALR... SETTING PC: " << nextPC << "\n"; - } - if (rdest) iregs[rdest] = PC_; - updatePC = true; + iresult = nextPC; + nextPC = iregs[rsrc0] + immsrc; + pipeline->stall_warp = true; + runOnce = true; break; + case L_INST: { + Word memAddr = ((iregs[rsrc0] + immsrc) & 0xFFFFFFFC); // Address word alignment + Word shift_by = ((iregs[rsrc0] + immsrc) & 0x00000003) * 8; + Word data_read = core_->dcache_read(memAddr, 0); + D(3, "LOAD MEM ADDRESS: " << std::hex << memAddr << ", DATA=0x" << data_read); + switch (func3) { + case 0: + // LBI + iresult = signExt((data_read >> shift_by) & 0xFF, 8, 0xFF); + break; + case 1: + // LHI + iresult = signExt((data_read >> shift_by) & 0xFFFF, 16, 0xFFFF); + break; + case 2: + // LW + iresult = data_read; + break; + case 4: + // LBU + iresult = Word((data_read >> shift_by) & 0xFF); + break; + case 5: + // LHU + iresult = Word((data_read >> shift_by) & 0xFFFF); + break; + default: + std::abort(); + } + } break; + case S_INST: { + Word memAddr = iregs[rsrc0] + immsrc; + switch (func3) { + case 0: + // SB + core_->dcache_write(memAddr, iregs[rsrc1] & 0x000000FF, 0, 1); + break; + case 1: + // SH + core_->dcache_write(memAddr, iregs[rsrc1], 0, 2); + break; + case 2: + // SW + core_->dcache_write(memAddr, iregs[rsrc1], 0, 4); + break; + default: + std::abort(); + } + D(3, "STORE MEM ADDRESS: " << std::hex << memAddr); + } break; case SYS_INST: { - D(3, "SYS_INST: r" << std::dec << rdest << " <- r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", imm=0x" << std::hex << immsrc); - Word rs1 = iregs[rsrc0]; Word csr_addr = immsrc & 0x00000FFF; + Word csr_value = core_->get_csr(csr_addr, t, id_); switch (func3) { case 0: if (csr_addr < 2) { // ECALL/EBREAK tmask_.reset(); active_ = tmask_.any(); + pipeline->stall_warp = true; } break; case 1: // CSRRW - if (rdest) iregs[rdest] = core_->get_csr(csr_addr, t, id_); - core_->set_csr(csr_addr, rs1); + iresult = csr_value; + core_->set_csr(csr_addr, iregs[rsrc0], t, id_); break; case 2: // CSRRS - if (rdest) iregs[rdest] = core_->get_csr(csr_addr, t, id_); - core_->set_csr(csr_addr, rs1 | core_->get_csr(csr_addr, t, id_)); + iresult = csr_value; + core_->set_csr(csr_addr, csr_value | iregs[rsrc0], t, id_); break; case 3: // CSRRC - if (rdest) iregs[rdest] = core_->get_csr(csr_addr, t, id_); - core_->set_csr(csr_addr, rs1 & ~core_->get_csr(csr_addr, t, id_)); + iresult = csr_value; + core_->set_csr(csr_addr, csr_value & ~iregs[rsrc0], t, id_); break; case 5: // CSRRWI - if (rdest) iregs[rdest] = core_->get_csr(csr_addr, t, id_); - core_->set_csr(csr_addr, rsrc0); + iresult = csr_value; + core_->set_csr(csr_addr, rsrc0, t, id_); break; case 6: // CSRRSI - if (rdest) iregs[rdest] = core_->get_csr(csr_addr, t, id_); - core_->set_csr(csr_addr, rsrc0 | core_->get_csr(csr_addr, t, id_)); + iresult = csr_value; + core_->set_csr(csr_addr, csr_value | rsrc0, t, id_); break; case 7: // CSRRCI - if (rdest) iregs[rdest] = core_->get_csr(csr_addr, t, id_); - core_->set_csr(csr_addr, rsrc0 & ~core_->get_csr(csr_addr, t, id_)); + iresult = csr_value; + core_->set_csr(csr_addr, csr_value & ~rsrc0, t, id_); break; default: break; @@ -608,1046 +437,15 @@ void Warp::execute(Instr &instr, trace_inst_t *trace_inst) { } break; case FENCE: D(3, "FENCE"); + pipeline->stall_warp = true; + runOnce = true; break; - case PJ_INST: - D(3, "PJ_INST: r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1]); - if (iregs[rsrc0]) { - if (!updatePC) - nextPC = iregs[rsrc1]; - updatePC = true; - } - break; - case GPGPU: - switch (func3) { - case 0: { - // TMC - D(3, "TMC: r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0]); - trace_inst->stall_warp = true; - int active_threads = std::min(iregs[rsrc0], core_->arch().num_threads()); - tmask_.reset(); - for (int i = 0; i < active_threads; ++i) { - tmask_[i] = true; - } - active_ = tmask_.any(); - runOnce = true; - } break; - case 1: { - // WSPAWN - D(3, "WSPAWN: r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1]); - trace_inst->wspawn = true; - int active_warps = std::min(iregs[rsrc0], core_->arch().num_warps()); - D(0, "Spawning " << (active_warps-1) << " warps at PC: " << std::hex << iregs[rsrc1]); - - for (int i = 1; i < active_warps; ++i) { - Warp &newWarp = core_->warp(i); - newWarp.setPC(iregs[rsrc1]); - newWarp.setTmask(0, true); - } - runOnce = true; - } break; - case 2: { - // SPLIT - D(3, "SPLIT: r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0]); - trace_inst->stall_warp = true; - if (checkUnanimous(rsrc0, iRegFile_, tmask_)) { - D(3, "Unanimous pred: " << rsrc0 << " val: " << iregs[rsrc0] << "\n"); - DomStackEntry e(tmask_); - e.unanimous = true; - domStack_.push(e); - break; - } - - D(3, "Split: Original TM: "); - DX( for (int i = 0; i < core_->arch().num_threads(); ++i) D(3, tmask_[i] << " "); ) - - ThreadMask tmask; - for (int i = 0; i < core_->arch().num_threads(); ++i) { - tmask[i] = tmask_[i] && !iRegFile_[i][rsrc0]; - } - - DomStackEntry e(tmask, PC_); - domStack_.push(tmask_); - domStack_.push(e); - for (unsigned i = 0; i < e.tmask.size(); ++i) { - tmask_[i] = !e.tmask[i] && tmask_[i]; - } - active_ = tmask_.any(); - - D(3, "Split: New TM"); - DX( for (int i = 0; i < core_->arch().num_threads(); ++i) D(3, tmask_[i] << " "); ) - D(3, "Split: Pushed TM PC: " << std::hex << e.PC << std::dec << "\n"); - DX( for (int i = 0; i < core_->arch().num_threads(); ++i) D(3, e.tmask[i] << " "); ) - - runOnce = true; - } break; - case 3: { - // JOIN - D(3, "JOIN"); - if (!domStack_.empty() && domStack_.top().unanimous) { - D(2, "Uni branch at join"); - printf("NEW DOMESTACK: \n"); - tmask_ = domStack_.top().tmask; - active_ = tmask_.any(); - domStack_.pop(); - break; - } - - if (!domStack_.top().fallThrough) { - if (!updatePC) { - nextPC = domStack_.top().PC; - D(3, "join: NOT FALLTHROUGH PC: " << std::hex << nextPC << std::dec); - } - updatePC = true; - } - - D(3, "Join: Old TM: "); - DX( for (int i = 0; i < core_->arch().num_threads(); ++i) D(3, tmask_[i] << " "); ) - std::cout << "\n"; - tmask_ = domStack_.top().tmask; - active_ = tmask_.any(); - - D(3, "Join: New TM: "); - DX( for (int i = 0; i < core_->arch().num_threads(); ++i) D(3, tmask_[i] << " "); ) - - domStack_.pop(); - runOnce = true; - } break; - case 4: { - // BAR - D(3, "BAR: r" << std::dec << rsrc0 << "=0x" << std::hex << iregs[rsrc0] << ", r" << std::dec << rsrc1 << "=0x" << std::hex << iregs[rsrc1]); - active_ = false; - core_->barrier(iregs[rsrc0], iregs[rsrc1], id_); - trace_inst->stall_warp = true; - runOnce = true; - } break; - default: - std::cout << "ERROR: UNSUPPORTED GPGPU INSTRUCTION " << instr << "\n"; - } - break; - case VSET_ARITH: { - D(3, "VSET_ARITH"); - int VLEN = core_->arch().vsize() * 8; - int VLMAX = (instr.getVlmul() * VLEN) / instr.getVsew(); - switch (func3) { - case 0: // vector-vector - switch (func6) { - case 0: { - D(4, "Addition " << rsrc0 << " " << rsrc1 << " Dest:" << rdest); - auto& vr1 = vRegFile_[rsrc0]; - auto& vr2 = vRegFile_[rsrc1]; - auto& vd = vRegFile_[rdest]; - auto& mask = vRegFile_[0]; - - if (vtype_.vsew == 8) { - for (int i = 0; i < vl_; i++) { - uint8_t emask = *(uint8_t *)(mask.data() + i); - uint8_t value = emask & 0x1; - if (vmask || (!vmask && value)) { - uint8_t first = *(uint8_t *)(vr1.data() + i); - uint8_t second = *(uint8_t *)(vr2.data() + i); - uint8_t result = first + second; - D(4, "Adding " << first << " + " << second << " = " << result); - *(uint8_t *)(vd.data() + i) = result; - } - } - } else if (vtype_.vsew == 16) { - for (int i = 0; i < vl_; i++) { - uint16_t emask = *(uint16_t *)(mask.data() + i); - uint16_t value = emask & 0x1; - if (vmask || (!vmask && value)) { - uint16_t first = *(uint16_t *)(vr1.data() + i); - uint16_t second = *(uint16_t *)(vr2.data() + i); - uint16_t result = first + second; - D(4, "Adding " << first << " + " << second << " = " << result); - *(uint16_t *)(vd.data() + i) = result; - } - } - } else if (vtype_.vsew == 32) { - D(4, "Doing 32 bit vector addition"); - for (int i = 0; i < vl_; i++) { - uint32_t emask = *(uint32_t *)(mask.data() + i); - uint32_t value = emask & 0x1; - if (vmask || (!vmask && value)) { - uint32_t first = *(uint32_t *)(vr1.data() + i); - uint32_t second = *(uint32_t *)(vr2.data() + i); - uint32_t result = first + second; - D(4, "Adding " << first << " + " << second << " = " << result); - *(uint32_t *)(vd.data() + i) = result; - } - } - } - } break; - case 24: //vmseq - { - auto &vr1 = vRegFile_[rsrc0]; - auto &vr2 = vRegFile_[rsrc1]; - auto &vd = vRegFile_[rdest]; - if (vtype_.vsew == 8) { - for (int i = 0; i < vl_; i++) { - uint8_t first = *(uint8_t *)(vr1.data() + i); - uint8_t second = *(uint8_t *)(vr2.data() + i); - uint8_t result = (first == second) ? 1 : 0; - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint8_t *)(vd.data() + i) = result; - } - - } else if (vtype_.vsew == 16) { - for (int i = 0; i < vl_; i++) { - uint16_t first = *(uint16_t *)(vr1.data() + i); - uint16_t second = *(uint16_t *)(vr2.data() + i); - uint16_t result = (first == second) ? 1 : 0; - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint16_t *)(vd.data() + i) = result; - } - - } else if (vtype_.vsew == 32) { - for (int i = 0; i < vl_; i++) { - uint32_t first = *(uint32_t *)(vr1.data() + i); - uint32_t second = *(uint32_t *)(vr2.data() + i); - uint32_t result = (first == second) ? 1 : 0; - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint32_t *)(vd.data() + i) = result; - } - } - - } break; - case 25: //vmsne - { - auto &vr1 = vRegFile_[rsrc0]; - auto &vr2 = vRegFile_[rsrc1]; - auto &vd = vRegFile_[rdest]; - if (vtype_.vsew == 8) { - for (int i = 0; i < vl_; i++) { - uint8_t first = *(uint8_t *)(vr1.data() + i); - uint8_t second = *(uint8_t *)(vr2.data() + i); - uint8_t result = (first != second) ? 1 : 0; - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint8_t *)(vd.data() + i) = result; - } - - } else if (vtype_.vsew == 16) { - for (int i = 0; i < vl_; i++) { - uint16_t first = *(uint16_t *)(vr1.data() + i); - uint16_t second = *(uint16_t *)(vr2.data() + i); - uint16_t result = (first != second) ? 1 : 0; - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint16_t *)(vd.data() + i) = result; - } - - } else if (vtype_.vsew == 32) { - for (int i = 0; i < vl_; i++) { - uint32_t first = *(uint32_t *)(vr1.data() + i); - uint32_t second = *(uint32_t *)(vr2.data() + i); - uint32_t result = (first != second) ? 1 : 0; - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint32_t *)(vd.data() + i) = result; - } - } - - } break; - case 26: //vmsltu - { - auto &vr1 = vRegFile_[rsrc0]; - auto &vr2 = vRegFile_[rsrc1]; - auto &vd = vRegFile_[rdest]; - if (vtype_.vsew == 8) { - for (int i = 0; i < vl_; i++) { - uint8_t first = *(uint8_t *)(vr1.data() + i); - uint8_t second = *(uint8_t *)(vr2.data() + i); - uint8_t result = (first < second) ? 1 : 0; - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint8_t *)(vd.data() + i) = result; - } - - } else if (vtype_.vsew == 16) { - for (int i = 0; i < vl_; i++) { - uint16_t first = *(uint16_t *)(vr1.data() + i); - uint16_t second = *(uint16_t *)(vr2.data() + i); - uint16_t result = (first < second) ? 1 : 0; - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint16_t *)(vd.data() + i) = result; - } - - } else if (vtype_.vsew == 32) { - for (int i = 0; i < vl_; i++) { - uint32_t first = *(uint32_t *)(vr1.data() + i); - uint32_t second = *(uint32_t *)(vr2.data() + i); - uint32_t result = (first < second) ? 1 : 0; - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint32_t *)(vd.data() + i) = result; - } - } - - } break; - case 27: //vmslt - { - auto &vr1 = vRegFile_[rsrc0]; - auto &vr2 = vRegFile_[rsrc1]; - auto &vd = vRegFile_[rdest]; - if (vtype_.vsew == 8) { - for (int i = 0; i < vl_; i++) { - int8_t first = *(int8_t *)(vr1.data() + i); - int8_t second = *(int8_t *)(vr2.data() + i); - int8_t result = (first < second) ? 1 : 0; - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint8_t *)(vd.data() + i) = result; - } - } else if (vtype_.vsew == 16) { - for (int i = 0; i < vl_; i++) { - int16_t first = *(int16_t *)(vr1.data() + i); - int16_t second = *(int16_t *)(vr2.data() + i); - int16_t result = (first < second) ? 1 : 0; - D(4, "Comparing " << first << " + " << second << " = " << result); - *(int16_t *)(vd.data() + i) = result; - } - } else if (vtype_.vsew == 32) { - for (int i = 0; i < vl_; i++) { - int32_t first = *(int32_t *)(vr1.data() + i); - int32_t second = *(int32_t *)(vr2.data() + i); - int32_t result = (first < second) ? 1 : 0; - D(4, "Comparing " << first << " + " << second << " = " << result); - *(int32_t *)(vd.data() + i) = result; - } - } - } break; - case 28: //vmsleu - { - auto &vr1 = vRegFile_[rsrc0]; - auto &vr2 = vRegFile_[rsrc1]; - auto &vd = vRegFile_[rdest]; - if (vtype_.vsew == 8) { - for (int i = 0; i < vl_; i++) { - uint8_t first = *(uint8_t *)(vr1.data() + i); - uint8_t second = *(uint8_t *)(vr2.data() + i); - uint8_t result = (first <= second) ? 1 : 0; - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint8_t *)(vd.data() + i) = result; - } - - } else if (vtype_.vsew == 16) { - for (int i = 0; i < vl_; i++) { - uint16_t first = *(uint16_t *)(vr1.data() + i); - uint16_t second = *(uint16_t *)(vr2.data() + i); - uint16_t result = (first <= second) ? 1 : 0; - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint16_t *)(vd.data() + i) = result; - } - - } else if (vtype_.vsew == 32) { - for (int i = 0; i < vl_; i++) { - uint32_t first = *(uint32_t *)(vr1.data() + i); - uint32_t second = *(uint32_t *)(vr2.data() + i); - uint32_t result = (first <= second) ? 1 : 0; - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint32_t *)(vd.data() + i) = result; - } - } - } break; - case 29: //vmsle - { - auto &vr1 = vRegFile_[rsrc0]; - auto &vr2 = vRegFile_[rsrc1]; - auto &vd = vRegFile_[rdest]; - if (vtype_.vsew == 8) { - for (int i = 0; i < vl_; i++) { - int8_t first = *(int8_t *)(vr1.data() + i); - int8_t second = *(int8_t *)(vr2.data() + i); - int8_t result = (first <= second) ? 1 : 0; - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint8_t *)(vd.data() + i) = result; - } - - } else if (vtype_.vsew == 16) { - for (int i = 0; i < vl_; i++) { - int16_t first = *(int16_t *)(vr1.data() + i); - int16_t second = *(int16_t *)(vr2.data() + i); - int16_t result = (first <= second) ? 1 : 0; - D(4, "Comparing " << first << " + " << second << " = " << result); - *(int16_t *)(vd.data() + i) = result; - } - - } else if (vtype_.vsew == 32) { - for (int i = 0; i < vl_; i++) { - int32_t first = *(int32_t *)(vr1.data() + i); - int32_t second = *(int32_t *)(vr2.data() + i); - int32_t result = (first <= second) ? 1 : 0; - D(4, "Comparing " << first << " + " << second << " = " << result); - *(int32_t *)(vd.data() + i) = result; - } - } - } break; - case 30: //vmsgtu - { - auto &vr1 = vRegFile_[rsrc0]; - auto &vr2 = vRegFile_[rsrc1]; - auto &vd = vRegFile_[rdest]; - if (vtype_.vsew == 8) { - for (int i = 0; i < vl_; i++) { - uint8_t first = *(uint8_t *)(vr1.data() + i); - uint8_t second = *(uint8_t *)(vr2.data() + i); - uint8_t result = (first > second) ? 1 : 0; - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint8_t *)(vd.data() + i) = result; - } - - } else if (vtype_.vsew == 16) { - for (int i = 0; i < vl_; i++) { - uint16_t first = *(uint16_t *)(vr1.data() + i); - uint16_t second = *(uint16_t *)(vr2.data() + i); - uint16_t result = (first > second) ? 1 : 0; - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint16_t *)(vd.data() + i) = result; - } - - } else if (vtype_.vsew == 32) { - for (int i = 0; i < vl_; i++) { - uint32_t first = *(uint32_t *)(vr1.data() + i); - uint32_t second = *(uint32_t *)(vr2.data() + i); - uint32_t result = (first > second) ? 1 : 0; - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint32_t *)(vd.data() + i) = result; - } - } - } break; - case 31: //vmsgt - { - auto &vr1 = vRegFile_[rsrc0]; - auto &vr2 = vRegFile_[rsrc1]; - auto &vd = vRegFile_[rdest]; - if (vtype_.vsew == 8) { - for (int i = 0; i < vl_; i++) { - int8_t first = *(int8_t *)(vr1.data() + i); - int8_t second = *(int8_t *)(vr2.data() + i); - int8_t result = (first > second) ? 1 : 0; - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint8_t *)(vd.data() + i) = result; - } - - } else if (vtype_.vsew == 16) { - for (int i = 0; i < vl_; i++) { - int16_t first = *(int16_t *)(vr1.data() + i); - int16_t second = *(int16_t *)(vr2.data() + i); - int16_t result = (first > second) ? 1 : 0; - D(4, "Comparing " << first << " + " << second << " = " << result); - *(int16_t *)(vd.data() + i) = result; - } - - } else if (vtype_.vsew == 32) { - for (int i = 0; i < vl_; i++) { - int32_t first = *(int32_t *)(vr1.data() + i); - int32_t second = *(int32_t *)(vr2.data() + i); - int32_t result = (first > second) ? 1 : 0; - D(4, "Comparing " << first << " + " << second << " = " << result); - *(int32_t *)(vd.data() + i) = result; - } - } - } break; - } - break; - case 2: { - switch (func6) { - case 24: //vmandnot - { - D(3, "vmandnot"); - auto &vr1 = vRegFile_[rsrc0]; - auto &vr2 = vRegFile_[rsrc1]; - auto &vd = vRegFile_[rdest]; - if (vtype_.vsew == 8) { - for (int i = 0; i < vl_; i++) { - uint8_t first = *(uint8_t *)(vr1.data() + i); - uint8_t second = *(uint8_t *)(vr2.data() + i); - uint8_t first_value = (first & 0x1); - uint8_t second_value = (second & 0x1); - uint8_t result = (first_value & !second_value); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint8_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint8_t *)(vd.data() + i) = 0; - } - } else if (vtype_.vsew == 16) { - for (int i = 0; i < vl_; i++) { - uint16_t first = *(uint16_t *)(vr1.data() + i); - uint16_t second = *(uint16_t *)(vr2.data() + i); - uint16_t first_value = (first & 0x1); - uint16_t second_value = (second & 0x1); - uint16_t result = (first_value & !second_value); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint16_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint16_t *)(vd.data() + i) = 0; - } - - } else if (vtype_.vsew == 32) { - for (int i = 0; i < vl_; i++) { - uint32_t first = *(uint32_t *)(vr1.data() + i); - uint32_t second = *(uint32_t *)(vr2.data() + i); - uint32_t first_value = (first & 0x1); - uint32_t second_value = (second & 0x1); - uint32_t result = (first_value & !second_value); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint32_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint32_t *)(vd.data() + i) = 0; - } - } - } break; - case 25: //vmand - { - D(3, "vmand"); - auto &vr1 = vRegFile_[rsrc0]; - auto &vr2 = vRegFile_[rsrc1]; - auto &vd = vRegFile_[rdest]; - if (vtype_.vsew == 8) { - for (int i = 0; i < vl_; i++) { - uint8_t first = *(uint8_t *)(vr1.data() + i); - uint8_t second = *(uint8_t *)(vr2.data() + i); - uint8_t first_value = (first & 0x1); - uint8_t second_value = (second & 0x1); - uint8_t result = (first_value & second_value); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint8_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint8_t *)(vd.data() + i) = 0; - } - - } else if (vtype_.vsew == 16) { - for (int i = 0; i < vl_; i++) { - uint16_t first = *(uint16_t *)(vr1.data() + i); - uint16_t second = *(uint16_t *)(vr2.data() + i); - uint16_t first_value = (first & 0x1); - uint16_t second_value = (second & 0x1); - uint16_t result = (first_value & second_value); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint16_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint16_t *)(vd.data() + i) = 0; - } - } else if (vtype_.vsew == 32) { - for (int i = 0; i < vl_; i++) { - uint32_t first = *(uint32_t *)(vr1.data() + i); - uint32_t second = *(uint32_t *)(vr2.data() + i); - uint32_t first_value = (first & 0x1); - uint32_t second_value = (second & 0x1); - uint32_t result = (first_value & second_value); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint32_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint32_t *)(vd.data() + i) = 0; - } - } - } break; - case 26: //vmor - { - D(3, "vmor"); - auto &vr1 = vRegFile_[rsrc0]; - auto &vr2 = vRegFile_[rsrc1]; - auto &vd = vRegFile_[rdest]; - if (vtype_.vsew == 8) { - for (int i = 0; i < vl_; i++) { - uint8_t first = *(uint8_t *)(vr1.data() + i); - uint8_t second = *(uint8_t *)(vr2.data() + i); - uint8_t first_value = (first & 0x1); - uint8_t second_value = (second & 0x1); - uint8_t result = (first_value | second_value); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint8_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint8_t *)(vd.data() + i) = 0; - } - - } else if (vtype_.vsew == 16) { - for (int i = 0; i < vl_; i++) { - uint16_t first = *(uint16_t *)(vr1.data() + i); - uint16_t second = *(uint16_t *)(vr2.data() + i); - uint16_t first_value = (first & 0x1); - uint16_t second_value = (second & 0x1); - uint16_t result = (first_value | second_value); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint16_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint16_t *)(vd.data() + i) = 0; - } - } else if (vtype_.vsew == 32) { - for (int i = 0; i < vl_; i++) { - uint32_t first = *(uint32_t *)(vr1.data() + i); - uint32_t second = *(uint32_t *)(vr2.data() + i); - uint32_t first_value = (first & 0x1); - uint32_t second_value = (second & 0x1); - uint32_t result = (first_value | second_value); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint32_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint32_t *)(vd.data() + i) = 0; - } - } - } break; - case 27: //vmxor - { - D(3, "vmxor"); - auto &vr1 = vRegFile_[rsrc0]; - auto &vr2 = vRegFile_[rsrc1]; - auto &vd = vRegFile_[rdest]; - if (vtype_.vsew == 8) { - for (int i = 0; i < vl_; i++) { - uint8_t first = *(uint8_t *)(vr1.data() + i); - uint8_t second = *(uint8_t *)(vr2.data() + i); - uint8_t first_value = (first & 0x1); - uint8_t second_value = (second & 0x1); - uint8_t result = (first_value ^ second_value); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint8_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint8_t *)(vd.data() + i) = 0; - } - } else if (vtype_.vsew == 16) { - for (int i = 0; i < vl_; i++) { - uint16_t first = *(uint16_t *)(vr1.data() + i); - uint16_t second = *(uint16_t *)(vr2.data() + i); - uint16_t first_value = (first & 0x1); - uint16_t second_value = (second & 0x1); - uint16_t result = (first_value ^ second_value); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint16_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint16_t *)(vd.data() + i) = 0; - } - - } else if (vtype_.vsew == 32) { - for (int i = 0; i < vl_; i++) { - uint32_t first = *(uint32_t *)(vr1.data() + i); - uint32_t second = *(uint32_t *)(vr2.data() + i); - uint32_t first_value = (first & 0x1); - uint32_t second_value = (second & 0x1); - uint32_t result = (first_value ^ second_value); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint32_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint32_t *)(vd.data() + i) = 0; - } - } - } break; - case 28: //vmornot - { - D(3, "vmornot"); - auto &vr1 = vRegFile_[rsrc0]; - auto &vr2 = vRegFile_[rsrc1]; - auto &vd = vRegFile_[rdest]; - if (vtype_.vsew == 8) { - for (int i = 0; i < vl_; i++) { - uint8_t first = *(uint8_t *)(vr1.data() + i); - uint8_t second = *(uint8_t *)(vr2.data() + i); - uint8_t first_value = (first & 0x1); - uint8_t second_value = (second & 0x1); - uint8_t result = (first_value | !second_value); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint8_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint8_t *)(vd.data() + i) = 0; - } - } else if (vtype_.vsew == 16) { - for (int i = 0; i < vl_; i++) { - uint16_t first = *(uint16_t *)(vr1.data() + i); - uint16_t second = *(uint16_t *)(vr2.data() + i); - uint16_t first_value = (first & 0x1); - uint16_t second_value = (second & 0x1); - uint16_t result = (first_value | !second_value); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint16_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint16_t *)(vd.data() + i) = 0; - } - - } else if (vtype_.vsew == 32) { - for (int i = 0; i < vl_; i++) { - uint32_t first = *(uint32_t *)(vr1.data() + i); - uint32_t second = *(uint32_t *)(vr2.data() + i); - uint32_t first_value = (first & 0x1); - uint32_t second_value = (second & 0x1); - uint32_t result = (first_value | !second_value); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint32_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint32_t *)(vd.data() + i) = 0; - } - } - } break; - case 29: //vmnand - { - D(3, "vmnand"); - auto &vr1 = vRegFile_[rsrc0]; - auto &vr2 = vRegFile_[rsrc1]; - auto &vd = vRegFile_[rdest]; - if (vtype_.vsew == 8) { - for (int i = 0; i < vl_; i++) { - uint8_t first = *(uint8_t *)(vr1.data() + i); - uint8_t second = *(uint8_t *)(vr2.data() + i); - uint8_t first_value = (first & 0x1); - uint8_t second_value = (second & 0x1); - uint8_t result = !(first_value & second_value); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint8_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint8_t *)(vd.data() + i) = 0; - } - - } else if (vtype_.vsew == 16) { - for (int i = 0; i < vl_; i++) { - uint16_t first = *(uint16_t *)(vr1.data() + i); - uint16_t second = *(uint16_t *)(vr2.data() + i); - uint16_t first_value = (first & 0x1); - uint16_t second_value = (second & 0x1); - uint16_t result = !(first_value & second_value); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint16_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint16_t *)(vd.data() + i) = 0; - } - } else if (vtype_.vsew == 32) { - for (int i = 0; i < vl_; i++) { - uint32_t first = *(uint32_t *)(vr1.data() + i); - uint32_t second = *(uint32_t *)(vr2.data() + i); - uint32_t first_value = (first & 0x1); - uint32_t second_value = (second & 0x1); - uint32_t result = !(first_value & second_value); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint32_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint32_t *)(vd.data() + i) = 0; - } - } - } break; - case 30: //vmnor - { - D(3, "vmnor"); - auto &vr1 = vRegFile_[rsrc0]; - auto &vr2 = vRegFile_[rsrc1]; - auto &vd = vRegFile_[rdest]; - if (vtype_.vsew == 8) { - for (int i = 0; i < vl_; i++) { - uint8_t first = *(uint8_t *)(vr1.data() + i); - uint8_t second = *(uint8_t *)(vr2.data() + i); - uint8_t first_value = (first & 0x1); - uint8_t second_value = (second & 0x1); - uint8_t result = !(first_value | second_value); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint8_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint8_t *)(vd.data() + i) = 0; - } - } else if (vtype_.vsew == 16) { - for (int i = 0; i < vl_; i++) { - uint16_t first = *(uint16_t *)(vr1.data() + i); - uint16_t second = *(uint16_t *)(vr2.data() + i); - uint16_t first_value = (first & 0x1); - uint16_t second_value = (second & 0x1); - uint16_t result = !(first_value | second_value); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint16_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint16_t *)(vd.data() + i) = 0; - } - - } else if (vtype_.vsew == 32) { - - for (int i = 0; i < vl_; i++) { - uint32_t first = *(uint32_t *)(vr1.data() + i); - uint32_t second = *(uint32_t *)(vr2.data() + i); - uint32_t first_value = (first & 0x1); - uint32_t second_value = (second & 0x1); - uint32_t result = !(first_value | second_value); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint32_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint32_t *)(vd.data() + i) = 0; - } - } - } break; - case 31: //vmxnor - { - D(3, "vmxnor"); - auto &vr1 = vRegFile_[rsrc0]; - auto &vr2 = vRegFile_[rsrc1]; - auto &vd = vRegFile_[rdest]; - if (vtype_.vsew == 8) { - for (int i = 0; i < vl_; i++) { - uint8_t first = *(uint8_t *)(vr1.data() + i); - uint8_t second = *(uint8_t *)(vr2.data() + i); - uint8_t first_value = (first & 0x1); - uint8_t second_value = (second & 0x1); - uint8_t result = !(first_value ^ second_value); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint8_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint8_t *)(vd.data() + i) = 0; - } - } else if (vtype_.vsew == 16) { - for (int i = 0; i < vl_; i++) { - uint16_t first = *(uint16_t *)(vr1.data() + i); - uint16_t second = *(uint16_t *)(vr2.data() + i); - uint16_t first_value = (first & 0x1); - uint16_t second_value = (second & 0x1); - uint16_t result = !(first_value ^ second_value); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint16_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint16_t *)(vd.data() + i) = 0; - } - - } else if (vtype_.vsew == 32) { - for (int i = 0; i < vl_; i++) { - uint32_t first = *(uint32_t *)(vr1.data() + i); - uint32_t second = *(uint32_t *)(vr2.data() + i); - uint32_t first_value = (first & 0x1); - uint32_t second_value = (second & 0x1); - uint32_t result = !(first_value ^ second_value); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint32_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint32_t *)(vd.data() + i) = 0; - } - } - } break; - case 37: //vmul - { - D(3, "vmul"); - auto &vr1 = vRegFile_[rsrc0]; - auto &vr2 = vRegFile_[rsrc1]; - auto &vd = vRegFile_[rdest]; - if (vtype_.vsew == 8) { - for (int i = 0; i < vl_; i++) { - uint8_t first = *(uint8_t *)(vr1.data() + i); - uint8_t second = *(uint8_t *)(vr2.data() + i); - uint8_t result = (first * second); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint8_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint8_t *)(vd.data() + i) = 0; - } - } else if (vtype_.vsew == 16) { - for (int i = 0; i < vl_; i++) { - uint16_t first = *(uint16_t *)(vr1.data() + i); - uint16_t second = *(uint16_t *)(vr2.data() + i); - uint16_t result = (first * second); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint16_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint16_t *)(vd.data() + i) = 0; - } - } else if (vtype_.vsew == 32) { - for (int i = 0; i < vl_; i++) { - uint32_t first = *(uint32_t *)(vr1.data() + i); - uint32_t second = *(uint32_t *)(vr2.data() + i); - uint32_t result = (first * second); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint32_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint32_t *)(vd.data() + i) = 0; - } - } - } break; - case 45: //vmacc - { - D(3, "vmacc"); - auto &vr1 = vRegFile_[rsrc0]; - auto &vr2 = vRegFile_[rsrc1]; - auto &vd = vRegFile_[rdest]; - if (vtype_.vsew == 8) { - for (int i = 0; i < vl_; i++) { - uint8_t first = *(uint8_t *)(vr1.data() + i); - uint8_t second = *(uint8_t *)(vr2.data() + i); - uint8_t result = (first * second); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint8_t *)(vd.data() + i) += result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint8_t *)(vd.data() + i) = 0; - } - } else if (vtype_.vsew == 16) { - for (int i = 0; i < vl_; i++) { - uint16_t first = *(uint16_t *)(vr1.data() + i); - uint16_t second = *(uint16_t *)(vr2.data() + i); - uint16_t result = (first * second); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint16_t *)(vd.data() + i) += result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint16_t *)(vd.data() + i) = 0; - } - } else if (vtype_.vsew == 32) { - for (int i = 0; i < vl_; i++) { - uint32_t first = *(uint32_t *)(vr1.data() + i); - uint32_t second = *(uint32_t *)(vr2.data() + i); - uint32_t result = (first * second); - D(4, "Comparing " << first << " + " << second << " = " << result); - *(uint32_t *)(vd.data() + i) += result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint32_t *)(vd.data() + i) = 0; - } - } - } break; - } - } break; - case 6: { - switch (func6) { - case 0: { - D(3, "vmadd.vx"); - //vector & vr1 = vRegFile_[rsrc0]; - auto &vr2 = vRegFile_[rsrc1]; - auto &vd = vRegFile_[rdest]; - if (vtype_.vsew == 8) { - for (int i = 0; i < vl_; i++) { - //uint8_t first = *(uint8_t *)(vr1.data() + i); - uint8_t second = *(uint8_t *)(vr2.data() + i); - uint8_t result = (iregs[rsrc0] + second); - D(4, "Comparing " << iregs[rsrc0] << " + " << second << " = " << result); - *(uint8_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint8_t *)(vd.data() + i) = 0; - } - } else if (vtype_.vsew == 16) { - for (int i = 0; i < vl_; i++) { - //uint16_t first = *(uint16_t *)(vr1.data() + i); - uint16_t second = *(uint16_t *)(vr2.data() + i); - uint16_t result = (iregs[rsrc0] + second); - D(4, "Comparing " << iregs[rsrc0] << " + " << second << " = " << result); - *(uint16_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint16_t *)(vd.data() + i) = 0; - } - - } else if (vtype_.vsew == 32) { - for (int i = 0; i < vl_; i++) { - //uint32_t first = *(uint32_t *)(vr1.data() + i); - uint32_t second = *(uint32_t *)(vr2.data() + i); - uint32_t result = (iregs[rsrc0] + second); - D(4, "Comparing " << iregs[rsrc0] << " + " << second << " = " << result); - *(uint32_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint32_t *)(vd.data() + i) = 0; - } - } - } break; - case 37: //vmul.vx - { - D(3, "vmul.vx"); - //vector & vr1 = vRegFile_[rsrc0]; - auto &vr2 = vRegFile_[rsrc1]; - auto &vd = vRegFile_[rdest]; - if (vtype_.vsew == 8) { - for (int i = 0; i < vl_; i++) { - //uint8_t first = *(uint8_t *)(vr1.data() + i); - uint8_t second = *(uint8_t *)(vr2.data() + i); - uint8_t result = (iregs[rsrc0] * second); - D(4, "Comparing " << iregs[rsrc0] << " + " << second << " = " << result); - *(uint8_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint8_t *)(vd.data() + i) = 0; - } - } else if (vtype_.vsew == 16) { - for (int i = 0; i < vl_; i++) { - //uint16_t first = *(uint16_t *)(vr1.data() + i); - uint16_t second = *(uint16_t *)(vr2.data() + i); - uint16_t result = (iregs[rsrc0] * second); - D(4, "Comparing " << iregs[rsrc0] << " + " << second << " = " << result); - *(uint16_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint16_t *)(vd.data() + i) = 0; - } - - } else if (vtype_.vsew == 32) { - for (int i = 0; i < vl_; i++) { - //uint32_t first = *(uint32_t *)(vr1.data() + i); - uint32_t second = *(uint32_t *)(vr2.data() + i); - uint32_t result = (iregs[rsrc0] * second); - D(4, "Comparing " << iregs[rsrc0] << " + " << second << " = " << result); - *(uint32_t *)(vd.data() + i) = result; - } - for (int i = vl_; i < VLMAX; i++) { - *(uint32_t *)(vd.data() + i) = 0; - } - } - } break; - } - } break; - case 7: { - vtype_.vill = 0; //TODO - vtype_.vediv = instr.getVediv(); - vtype_.vsew = instr.getVsew(); - vtype_.vlmul = instr.getVlmul(); - - D(3, "lmul:" << vtype_.vlmul << " sew:" << vtype_.vsew << " ediv: " << vtype_.vediv << "rsrc_" << iregs[rsrc0] << "VLMAX" << VLMAX); - - int s0 = iregs[rsrc0]; - if (s0 <= VLMAX) { - vl_ = s0; - } else if (s0 < (2 * VLMAX)) { - vl_ = (int)ceil((s0 * 1.0) / 2.0); - } else if (s0 >= (2 * VLMAX)) { - vl_ = VLMAX; - } - if (rdest) iregs[rdest] = vl_; - } break; - default: { - std::abort(); - } - } - } break; case (FL | VL): - ++loads_; - if ( func3==0x2 ) { - //std::cout << "FL_INST\n"; - // rs1 is integer is register! - Word memAddr = ((iregs[rsrc0] + immsrc) & 0xFFFFFFFC); // alignment - D(9,"something weird happen!"); - Word data_read = core_->dcache_read(memAddr, 0); - D(3, "Memaddr"); - DPN(3, ' ' << std::setw(8) << std::hex << memAddr << std::endl); - trace_inst->is_lw = true; - trace_inst->mem_addresses[t] = memAddr; - // //std::cout < data_read: " << data_read << "\n"; - switch (func3) { - case 2: // FLW - fregs[rdest] = data_read & 0xFFFFFFFF; - D(4, "fpReg[rd] " << std::setw(8) << std::hex << fregs[rdest] << std::endl); - break; - default: - std::cout << "ERROR: UNSUPPORTED FL INST\n"; - exit(1); - } - D(3, "LOAD MEM ADDRESS: " << std::hex << memAddr); + if (func3 == 0x2) { + Word memAddr = iregs[rsrc0] + immsrc; + Word data_read = core_->dcache_read(memAddr, 0); + D(3, "LOAD MEM ADDRESS: " << std::hex << memAddr << ", DATA=0x" << data_read); + fresult = data_read; } else { D(3, "Executing vector load"); D(4, "lmul: " << vtype_.vlmul << " VLEN:" << (core_->arch().vsize() * 8) << "sew: " << vtype_.vsew); @@ -1665,13 +463,9 @@ void Warp::execute(Instr &instr, trace_inst_t *trace_inst) { D(4, "Mem addr: " << std::hex << memAddr << " Data read " << data_read); int *result_ptr = (int *)(vd.data() + i); *result_ptr = data_read; - - trace_inst->is_lw = true; - trace_inst->mem_addresses[i] = memAddr; D(3, "STORE MEM ADDRESS: " << std::hex << memAddr); } - // cout << "Finished loop" << std::endl; } break; default: std::abort(); @@ -1680,55 +474,16 @@ void Warp::execute(Instr &instr, trace_inst_t *trace_inst) { } break; case (FS | VS): - ++stores_; - if ((func3 == 0x1) || (func3 == 0x2) - || (func3 == 0x3) || (func3 == 0x4)) { - //std::cout << "FS_INST\n"; - // base is integer register! + if (func3 == 0x2) { Word memAddr = iregs[rsrc0] + immsrc; - trace_inst->is_sw = true; - trace_inst->mem_addresses[t] = memAddr; - - switch (func3) { - case 1: - std::cout << "ERROR: UNSUPPORTED FS INST\n"; - std::cout << "FSH\n"; - exit(1); - // c.core->mem.write(memAddr, fregs[rsrc1], c.supervisorMode, 2); - break; - case 2: - // //std::cout << std::hex << "FSW: about to write: " << fregs[rsrc1] << " to " << memAddr << "\n"; - core_->dcache_write(memAddr, fregs[rsrc1], 0, 4); - break; - case 3: - std::cout << "ERROR: UNSUPPORTED FS INST\n"; - std::cout << std::hex << "FSD (*not implemented*): about to write: " << fregs[rsrc1] << " to " << memAddr << "\n"; - exit(1); - // c.core->mem.write(memAddr, reg[rsrc1], c.supervisorMode, 8); - break; - case 4: - std::cout << "ERROR: UNSUPPORTED FS INST\n"; - std::cout << std::hex << "FSQ (*not implemented*): about to write: " << fregs[rsrc1] << " to " << memAddr << "\n"; - exit(1); - // c.core->mem.write(memAddr, reg[rsrc1], c.supervisorMode, 16); - break; - default: - std::cout << "ERROR: UNSUPPORTED FS INST\n"; - exit(1); - } + core_->dcache_write(memAddr, fregs[rsrc1], 0, 4); D(3, "STORE MEM ADDRESS: " << std::hex << memAddr); } else { for (int i = 0; i < vl_; i++) { - // cout << "iter" << std::endl; Word memAddr = iregs[rsrc0] + (i * vtype_.vsew / 8); - // std::cout << "STORE MEM ADDRESS *** : " << std::hex << memAddr << "\n"; - - trace_inst->is_sw = true; - trace_inst->mem_addresses[i] = memAddr; - switch (instr.getVlsWidth()) { - case 6: //store word and unit strided (not checking for unit stride) - { + case 6: { + //store word and unit strided (not checking for unit stride) uint32_t value = *(uint32_t *)(vRegFile_[instr.getVs3()].data() + i); core_->dcache_write(memAddr, value, 0, 4); D(4, "store: " << memAddr << " value:" << value); @@ -1748,96 +503,91 @@ void Warp::execute(Instr &instr, trace_inst_t *trace_inst) { case 0x0c: //FDIV case 0x2c: //FSQRT { - if (fpBinIsNan(fregs[rsrc0]) || fpBinIsNan(fregs[rsrc1])) { // if one of op is NaN - D(3, "one or two rsrc is NaN!"); - // one of them is not quiet NaN, them set FCSR + if (fpBinIsNan(fregs[rsrc0]) || fpBinIsNan(fregs[rsrc1])) { + // if one of op is NaN, one of them is not quiet NaN, them set FCSR if ((fpBinIsNan(fregs[rsrc0])==2) | (fpBinIsNan(fregs[rsrc1])==2)) { - core_->set_csr(0x003, core_->get_csr(0x003, t, id_) | 0x10); // set NV bit - core_->set_csr(0x001, core_->get_csr(0x001, t, id_) | 0x10); // set NV bit + core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x10, t, id_); // set NV bit + core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x10, t, id_); // set NV bit } if (fpBinIsNan(fregs[rsrc0]) && fpBinIsNan(fregs[rsrc1])) - fregs[rdest] = 0x7fc00000; // canonical(quiet) NaN + fresult = 0x7fc00000; // canonical(quiet) NaN else if (fpBinIsNan(fregs[rsrc0])) - fregs[rdest] = fregs[rsrc1]; + fresult = fregs[rsrc1]; else - fregs[rdest] = fregs[rsrc0]; + fresult = fregs[rsrc0]; } else { float fpsrc_0 = intregToFloat(fregs[rsrc0]); float fpsrc_1 = intregToFloat(fregs[rsrc1]); - float fpOut; - + float fpDest; + feclearexcept(FE_ALL_EXCEPT); if (func7 == 0x00) // FADD - fpOut = fpsrc_0 + fpsrc_1; + fpDest = fpsrc_0 + fpsrc_1; else if (func7==0x04) // FSUB - fpOut = fpsrc_0 - fpsrc_1; + fpDest = fpsrc_0 - fpsrc_1; else if (func7==0x08) // FMUL - fpOut = fpsrc_0 * fpsrc_1; + fpDest = fpsrc_0 * fpsrc_1; else if (func7==0x0c) // FDIV - fpOut = fpsrc_0 / fpsrc_1; + fpDest = fpsrc_0 / fpsrc_1; else if (func7==0x2c) // FSQRT - fpOut = sqrt(fpsrc_0); + fpDest = sqrt(fpsrc_0); else { - printf("#[ERROR]: bad thing happened in fadd/fsub/fmul...\n"); - exit(1); - } - //show_fe_exceptions(); // once shown, it will clear corresponding bits, just for debug - + std::abort(); + } // fcsr defined in riscv if (fetestexcept(FE_INEXACT)) { - core_->set_csr(0x003, core_->get_csr(0x003, t, id_) | 0x1); // set NX bit - core_->set_csr(0x001, core_->get_csr(0x001, t, id_) | 0x1); // set NX bit + core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x1, t, id_); // set NX bit + core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x1, t, id_); // set NX bit } if (fetestexcept(FE_UNDERFLOW)) { - core_->set_csr(0x003, core_->get_csr(0x003, t, id_) | 0x2); // set UF bit - core_->set_csr(0x001, core_->get_csr(0x001, t, id_) | 0x2); // set UF bit + core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x2, t, id_); // set UF bit + core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x2, t, id_); // set UF bit } if (fetestexcept(FE_OVERFLOW)) { - core_->set_csr(0x003, core_->get_csr(0x003, t, id_) | 0x4); // set OF bit - core_->set_csr(0x001, core_->get_csr(0x001, t, id_) | 0x4); // set OF bit + core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x4, t, id_); // set OF bit + core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x4, t, id_); // set OF bit } if (fetestexcept(FE_DIVBYZERO)) { - core_->set_csr(0x003, core_->get_csr(0x003, t, id_) | 0x8); // set DZ bit - core_->set_csr(0x001, core_->get_csr(0x001, t, id_) | 0x8); // set DZ bit + core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x8, t, id_); // set DZ bit + core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x8, t, id_); // set DZ bit } if (fetestexcept(FE_INVALID)) { - core_->set_csr(0x003, core_->get_csr(0x003, t, id_) | 0x10); // set NX bit - core_->set_csr(0x001, core_->get_csr(0x001, t, id_) | 0x10); // set NX bit + core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x10, t, id_); // set NX bit + core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x10, t, id_); // set NX bit } - D(4, "fpOut: " << fpOut); - if (fpBinIsNan(floatToBin(fpOut)) == 0) { - fregs[rdest] = floatToBin(fpOut); + D(4, "fpDest: " << fpDest); + if (fpBinIsNan(floatToBin(fpDest)) == 0) { + fresult = floatToBin(fpDest); } else { // According to risc-v spec p.64 section 11.3 // If the result is NaN, it is the canonical NaN - fregs[rdest] = 0x7fc00000; + fresult = 0x7fc00000; } } } break; - // FSGNJ.S, FSGNJN.S FSGNJX.S + // FSGNJ.S, FSGNJN.S, FSGNJX.S case 0x10: { bool fsign1 = fregs[rsrc0] & 0x80000000; uint32_t fdata1 = fregs[rsrc0] & 0x7FFFFFFF; bool fsign2 = fregs[rsrc1] & 0x80000000; - switch (func3) { case 0: // FSGNJ.S - fregs[rdest] = (fsign2 << 31) | fdata1; + fresult = (fsign2 << 31) | fdata1; break; case 1: // FSGNJN.S fsign2 = !fsign2; - fregs[rdest] = (fsign2 << 31) | fdata1; + fresult = (fsign2 << 31) | fdata1; break; case 2: { // FSGNJX.S bool sign = fsign1 ^ fsign2; - fregs[rdest] = (sign << 31) | fdata1; + fresult = (sign << 31) | fdata1; } break; } } break; @@ -1847,27 +597,28 @@ void Warp::execute(Instr &instr, trace_inst_t *trace_inst) { if (fpBinIsNan(fregs[rsrc0]) || fpBinIsNan(fregs[rsrc1])) { // if one of src is NaN // one of them is not quiet NaN, them set FCSR if ((fpBinIsNan(fregs[rsrc0])==2) | (fpBinIsNan(fregs[rsrc1])==2)) { - core_->set_csr(0x003, core_->get_csr(0x003, t, id_) | 0x10); // set NV bit - core_->set_csr(0x001, core_->get_csr(0x001, t, id_) | 0x10); // set NV bit + core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x10, t, id_); // set NV bit + core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x10, t, id_); // set NV bit } if (fpBinIsNan(fregs[rsrc0]) && fpBinIsNan(fregs[rsrc1])) - fregs[rdest] = 0x7fc00000; // canonical(quiet) NaN + fresult = 0x7fc00000; // canonical(quiet) NaN else if (fpBinIsNan(fregs[rsrc0])) - fregs[rdest] = fregs[rsrc1]; + fresult = fregs[rsrc1]; else - fregs[rdest] = fregs[rsrc0]; + fresult = fregs[rsrc0]; } else { uint8_t sr0IsZero = fpBinIsZero(fregs[rsrc0]); uint8_t sr1IsZero = fpBinIsZero(fregs[rsrc1]); - if (sr0IsZero && sr1IsZero && (sr0IsZero != sr1IsZero)) { // both are zero and not equal + if (sr0IsZero && sr1IsZero && (sr0IsZero != sr1IsZero)) { + // both are zero and not equal // handle corner case that compare +0 and -0 if (func3) { // FMAX.S - fregs[rdest] = (sr1IsZero==2) ? fregs[rsrc1] : fregs[rsrc0]; + fresult = (sr1IsZero==2) ? fregs[rsrc1] : fregs[rsrc0]; } else { // FMIM.S - fregs[rdest] = (sr1IsZero==2) ? fregs[rsrc0] : fregs[rsrc1]; + fresult = (sr1IsZero==2) ? fregs[rsrc0] : fregs[rsrc1]; } } else { float rs1 = intregToFloat(fregs[rsrc0]); @@ -1875,26 +626,24 @@ void Warp::execute(Instr &instr, trace_inst_t *trace_inst) { if (func3) { // FMAX.S float fmax = std::max(rs1, rs2); - fregs[rdest] = floatToBin(fmax); + fresult = floatToBin(fmax); } else { // FMIN.S float fmin = std::min(rs1, rs2); - fregs[rdest] = floatToBin(fmin); + fresult = floatToBin(fmin); } } } } break; // FCVT.W.S FCVT.WU.S - case 0x60: { - // TODO: Need to clip result if rounded result is not representable in the destination format - // FCVT.W.S - // Convert floating point to 32-bit signed integer + case 0x60: { float fpSrc = intregToFloat(fregs[rsrc0]); - Word result = 0x00000000; - bool outOfRange = false; - // FCVT.W.S - if (rsrc1 == 0) { // Not sure if need to change to floating point reg + Word result; + bool outOfRange = false; + if (rsrc1 == 0) { + // FCVT.W.S + // Convert floating point to 32-bit signed integer if (fpSrc > pow(2.0, 31) - 1 || fpBinIsNan(fregs[rsrc0]) || fpBinIsInf(fregs[rsrc0]) == 2) { feclearexcept(FE_ALL_EXCEPT); outOfRange = true; @@ -1927,36 +676,33 @@ void Warp::execute(Instr &instr, trace_inst_t *trace_inst) { result = (uint32_t) fpSrc; } } - - //show_fe_exceptions(); - // fcsr defined in riscv if (fetestexcept(FE_INEXACT)) { - core_->set_csr(0x003, core_->get_csr(0x003, t, id_) | 0x1); // set NX bit - core_->set_csr(0x001, core_->get_csr(0x001, t, id_) | 0x1); // set NX bit + core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x1, t, id_); // set NX bit + core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x1, t, id_); // set NX bit } if (fetestexcept(FE_UNDERFLOW)) { - core_->set_csr(0x003, core_->get_csr(0x003, t, id_) | 0x2); // set UF bit - core_->set_csr(0x001, core_->get_csr(0x001, t, id_) | 0x2); // set UF bit + core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x2, t, id_); // set UF bit + core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x2, t, id_); // set UF bit } if (fetestexcept(FE_OVERFLOW)) { - core_->set_csr(0x003, core_->get_csr(0x003, t, id_) | 0x4); // set OF bit - core_->set_csr(0x001, core_->get_csr(0x001, t, id_) | 0x4); // set OF bit + core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x4, t, id_); // set OF bit + core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x4, t, id_); // set OF bit } if (fetestexcept(FE_DIVBYZERO)) { - core_->set_csr(0x003, core_->get_csr(0x003, t, id_) | 0x8); // set DZ bit - core_->set_csr(0x001, core_->get_csr(0x001, t, id_) | 0x8); // set DZ bit + core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x8, t, id_); // set DZ bit + core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x8, t, id_); // set DZ bit } if (fetestexcept(FE_INVALID) || outOfRange) { - core_->set_csr(0x003, core_->get_csr(0x003, t, id_) | 0x10); // set NV bit - core_->set_csr(0x001, core_->get_csr(0x001, t, id_) | 0x10); // set NV bit + core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x10, t, id_); // set NV bit + core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x10, t, id_); // set NV bit } - if (rdest) iregs[rdest] = result; + iresult = result; } break; // FMV.X.W FCLASS.S @@ -1965,33 +711,33 @@ void Warp::execute(Instr &instr, trace_inst_t *trace_inst) { if (func3) { // Examine the value in fpReg rs1 and write to integer rd // a 10-bit mask to indicate the class of the fp number - if (rdest) iregs[rdest] = 0; // clear all bits + iresult = 0; // clear all bits bool fsign = fregs[rsrc0] & 0x80000000; uint32_t expo = (fregs[rsrc0]>>23) & 0x000000FF; uint32_t fraction = fregs[rsrc0] & 0x007FFFFF; if ((expo==0) && (fraction==0)) { - if (rdest) iregs[rdest] = fsign? (1<<3) : (1<<4); // +/- 0 + iresult = fsign ? (1<<3) : (1<<4); // +/- 0 } else if ((expo==0) && (fraction!=0)) { - if (rdest) iregs[rdest] = fsign? (1<<2) : (1<<5); // +/- subnormal + iresult = fsign ? (1<<2) : (1<<5); // +/- subnormal } else if ((expo==0xFF) && (fraction==0)) { - if (rdest) iregs[rdest] = fsign? (1<<0) : (1<<7); // +/- infinity + iresult = fsign ? (1<<0) : (1<<7); // +/- infinity } else if ((expo==0xFF) && (fraction!=0)) { if (!fsign && (fraction == 0x00400000)) { - if (rdest) iregs[rdest] = (1<<9); // quiet NaN + iresult = (1<<9); // quiet NaN } else { - if (rdest) iregs[rdest] = (1<<8); // signaling NaN + iresult = (1<<8); // signaling NaN } } else { - if (rdest) iregs[rdest] = fsign? (1<<1) : (1<<6); // +/- normal + iresult = fsign ? (1<<1) : (1<<6); // +/- normal } } else { // FMV.X.W // Move bit values from floating-point register rs1 to integer register rd // Since we are using integer register to represent floating point register, // just simply assign here. - if (rdest) iregs[rdest] = fregs[rsrc0]; + iresult = fregs[rsrc0]; } } break; @@ -2004,74 +750,58 @@ void Warp::execute(Instr &instr, trace_inst_t *trace_inst) { // FLE.S or FLT.S if (func3 == 0 || func3 == 1) { // If either input is NaN, set NV bit - core_->set_csr(0x003, core_->get_csr(0x003, t, id_) | 0x10); // set NV bit - core_->set_csr(0x001, core_->get_csr(0x001, t, id_) | 0x10); // set NV bit + core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x10, t, id_); // set NV bit + core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x10, t, id_); // set NV bit } else { // FEQ.S // Only set NV bit if it is signaling NaN if (fpBinIsNan(fregs[rsrc0]) == 2 || fpBinIsNan(fregs[rsrc1]) == 2) { // If either input is NaN, set NV bit - core_->set_csr(0x003, core_->get_csr(0x003, t, id_) | 0x10); // set NV bit - core_->set_csr(0x001, core_->get_csr(0x001, t, id_) | 0x10); // set NV bit + core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x10, t, id_); // set NV bit + core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x10, t, id_); // set NV bit } } // The result is 0 if either operand is NaN - if (rdest) iregs[rdest] = 0; + iresult = 0; } else { switch(func3) { - case 0: { - // FLE.S - if (intregToFloat(fregs[rsrc0]) <= intregToFloat(fregs[rsrc1])) { - if (rdest) iregs[rdest] = 1; - } else { - if (rdest) iregs[rdest] = 0; - } - } break; - - case 1: { - // FLT.S - if (intregToFloat(fregs[rsrc0]) < intregToFloat(fregs[rsrc1])) { - if (rdest) iregs[rdest] = 1; - } else { - if (rdest) iregs[rdest] = 0; - } - } - break; - - case 2: { - // FEQ.S - if (intregToFloat(fregs[rsrc0]) == intregToFloat(fregs[rsrc1])) { - if (rdest) iregs[rdest] = 1; - } else { - if (rdest) iregs[rdest] = 0; - } - } - break; + case 0: { + // FLE.S + iresult = (intregToFloat(fregs[rsrc0]) <= intregToFloat(fregs[rsrc1])); + } break; + case 1: { + // FLT.S + iresult = (intregToFloat(fregs[rsrc0]) < intregToFloat(fregs[rsrc1])); + } break; + case 2: { + // FEQ.S + iresult = (intregToFloat(fregs[rsrc0]) == intregToFloat(fregs[rsrc1])); + } break; + default: + std::abort(); } } } break; - case 0x68: { - // Cast integer to floating point - float data = iregs[rsrc0]; + case 0x68: + // Cast integer to floating point if (rsrc1) { - // FCVT.S.WU - // Convert 32-bit unsigned integer to floating point - fregs[rdest] = floatToBin(data); + // FCVT.S.WU: convert 32-bit unsigned integer to floating point + float data = iregs[rsrc0]; + fresult = floatToBin(data); } else { - // FCVT.S.W - // Convert 32-bit signed integer to floating point + // FCVT.S.W: convert 32-bit signed integer to floating point // iregs[rsrc0] is actually a unsigned number - data = (int)iregs[rsrc0]; - fregs[rdest] = floatToBin(data); + float data = (WordI)iregs[rsrc0]; + fresult = floatToBin(data); } - } break; + break; case 0x78: { // FMV.W.X // Move bit values from integer register rs1 to floating register rd // Since we are using integer register to represent floating point register, // just simply assign here. - fregs[rdest] = iregs[rsrc0]; + fresult = iregs[rsrc0]; } break; } @@ -2082,91 +812,1053 @@ void Warp::execute(Instr &instr, trace_inst_t *trace_inst) { case FMNMADD: case FMNMSUB: { // multiplicands are infinity and zero, them set FCSR - if (fpBinIsZero(fregs[rsrc0])|| fpBinIsZero(fregs[rsrc1])|| fpBinIsInf(fregs[rsrc0]) || fpBinIsInf(fregs[rsrc1])) { - core_->set_csr(0x003, core_->get_csr(0x003, t, id_) | 0x10); // set NV bit - core_->set_csr(0x001, core_->get_csr(0x001, t, id_) | 0x10); // set NV bit + if (fpBinIsZero(fregs[rsrc0]) || fpBinIsZero(fregs[rsrc1]) || fpBinIsInf(fregs[rsrc0]) || fpBinIsInf(fregs[rsrc1])) { + core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x10, t, id_); // set NV bit + core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x10, t, id_); // set NV bit } - if (fpBinIsNan(fregs[rsrc0]) || fpBinIsNan(fregs[rsrc1]) || fpBinIsNan(fregs[rsrc2])) { - // if one of op is NaN - // if addend is not quiet NaN, them set FCSR + // if one of op is NaN, if addend is not quiet NaN, them set FCSR if ((fpBinIsNan(fregs[rsrc0])==2) | (fpBinIsNan(fregs[rsrc1])==2) | (fpBinIsNan(fregs[rsrc1])==2)) { - core_->set_csr(0x003, core_->get_csr(0x003, t, id_) | 0x10); // set NV bit - core_->set_csr(0x001, core_->get_csr(0x001, t, id_) | 0x10); // set NV bit + core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x10, t, id_); // set NV bit + core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x10, t, id_); // set NV bit } - fregs[rdest] = 0x7fc00000; // canonical(quiet) NaN + fresult = 0x7fc00000; // canonical(quiet) NaN } else { float rs1 = intregToFloat(fregs[rsrc0]); float rs2 = intregToFloat(fregs[rsrc1]); float rs3 = intregToFloat(fregs[rsrc2]); - float fpOut(0.0); + float fpDest(0.0); feclearexcept(FE_ALL_EXCEPT); switch (opcode) { case FMADD: // rd = (rs1*rs2)+rs3 - fpOut = (rs1 * rs2) + rs3; break; + fpDest = (rs1 * rs2) + rs3; break; case FMSUB: // rd = (rs1*rs2)-rs3 - fpOut = (rs1 * rs2) - rs3; break; + fpDest = (rs1 * rs2) - rs3; break; case FMNMADD: // rd = -(rs1*rs2)+rs3 - fpOut = -1*(rs1 * rs2) - rs3; break; + fpDest = -1*(rs1 * rs2) - rs3; break; case FMNMSUB: // rd = -(rs1*rs2)-rs3 - fpOut = -1*(rs1 * rs2) + rs3; break; + fpDest = -1*(rs1 * rs2) + rs3; break; default: - printf("#[ERROR] FMADD/FMSUB... wrong\n"); std::abort(); break; } // fcsr defined in riscv if (fetestexcept(FE_INEXACT)) { - core_->set_csr(0x003, core_->get_csr(0x003, t, id_) | 0x1); // set NX bit - core_->set_csr(0x001, core_->get_csr(0x001, t, id_) | 0x1); // set NX bit + core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x1, t, id_); // set NX bit + core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x1, t, id_); // set NX bit } if (fetestexcept(FE_UNDERFLOW)) { - core_->set_csr(0x003, core_->get_csr(0x003, t, id_) | 0x2); // set UF bit - core_->set_csr(0x001, core_->get_csr(0x001, t, id_) | 0x2); // set UF bit + core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x2, t, id_); // set UF bit + core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x2, t, id_); // set UF bit } if (fetestexcept(FE_OVERFLOW)) { - core_->set_csr(0x003, core_->get_csr(0x003, t, id_) | 0x4); // set OF bit - core_->set_csr(0x001, core_->get_csr(0x001, t, id_) | 0x4); // set OF bit + core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x4, t, id_); // set OF bit + core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x4, t, id_); // set OF bit } if (fetestexcept(FE_DIVBYZERO)) { - core_->set_csr(0x003, core_->get_csr(0x003, t, id_) | 0x8); // set DZ bit - core_->set_csr(0x001, core_->get_csr(0x001, t, id_) | 0x8); // set DZ bit + core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x8, t, id_); // set DZ bit + core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x8, t, id_); // set DZ bit } if (fetestexcept(FE_INVALID)) { - core_->set_csr(0x003, core_->get_csr(0x003, t, id_) | 0x10); // set NV bit - core_->set_csr(0x001, core_->get_csr(0x001, t, id_) | 0x10); // set NV bit + core_->set_csr(CSR_FCSR, core_->get_csr(CSR_FCSR, t, id_) | 0x10, t, id_); // set NV bit + core_->set_csr(CSR_FFLAGS, core_->get_csr(CSR_FFLAGS, t, id_) | 0x10, t, id_); // set NV bit } - fregs[rdest] = floatToBin(fpOut); + fresult = floatToBin(fpDest); } } break; + case GPGPU: + switch (func3) { + case 0: { + // TMC + int active_threads = std::min(iregs[rsrc0], num_threads); + tmask_.reset(); + for (int i = 0; i < active_threads; ++i) { + tmask_[i] = true; + } + active_ = tmask_.any(); + pipeline->stall_warp = true; + runOnce = true; + } break; + case 1: { + // WSPAWN + int active_warps = std::min(iregs[rsrc0], core_->arch().num_warps()); + D(0, "Spawning " << (active_warps-1) << " warps at PC: " << std::hex << iregs[rsrc1]); + for (int i = 1; i < active_warps; ++i) { + Warp &newWarp = core_->warp(i); + newWarp.setPC(iregs[rsrc1]); + newWarp.setTmask(0, true); + } + pipeline->stall_warp = true; + runOnce = true; + } break; + case 2: { + // SPLIT + if (checkUnanimous(rsrc0, iRegFile_, tmask_)) { + D(3, "Unanimous pred: " << rsrc0 << " val: " << iregs[rsrc0] << "\n"); + DomStackEntry e(tmask_); + e.unanimous = true; + domStack_.push(e); + } else { + D(3, "Split: Original TM: "); + DX( for (int i = 0; i < num_threads; ++i) D(3, tmask_[i] << " "); ) + + ThreadMask tmask; + for (int i = 0; i < num_threads; ++i) { + tmask[i] = tmask_[i] && !iRegFile_[i][rsrc0]; + } + + DomStackEntry e(tmask, nextPC); + domStack_.push(tmask_); + domStack_.push(e); + for (size_t i = 0; i < e.tmask.size(); ++i) { + tmask_[i] = !e.tmask[i] && tmask_[i]; + } + active_ = tmask_.any(); + + D(3, "Split: New TM"); + DX( for (int i = 0; i < num_threads; ++i) D(3, tmask_[i] << " "); ) + + D(3, "Split: Pushed TM PC: " << std::hex << e.PC << std::dec << "\n"); + DX( for (int i = 0; i < num_threads; ++i) D(3, e.tmask[i] << " "); ) + } + pipeline->stall_warp = true; + runOnce = true; + } break; + case 3: { + // JOIN + D(3, "JOIN"); + if (!domStack_.empty() && domStack_.top().unanimous) { + D(2, "Uninimous branch at join"); + tmask_ = domStack_.top().tmask; + active_ = tmask_.any(); + domStack_.pop(); + } else { + if (!domStack_.top().fallThrough) { + nextPC = domStack_.top().PC; + D(3, "join: NOT FALLTHROUGH PC: " << std::hex << nextPC << std::dec); + } + + D(3, "Join: Old TM: "); + DX( for (int i = 0; i < num_threads; ++i) D(3, tmask_[i] << " "); ) + std::cout << "\n"; + tmask_ = domStack_.top().tmask; + active_ = tmask_.any(); + + D(3, "Join: New TM: "); + DX( for (int i = 0; i < num_threads; ++i) D(3, tmask_[i] << " "); ) + + domStack_.pop(); + } + pipeline->stall_warp = true; + runOnce = true; + } break; + case 4: { + // BAR + active_ = false; + core_->barrier(iregs[rsrc0], iregs[rsrc1], id_); + pipeline->stall_warp = true; + runOnce = true; + } break; + default: + std::abort(); + } + break; + case VSET: { + D(3, "VSET"); + int VLEN = core_->arch().vsize() * 8; + int VLMAX = (instr.getVlmul() * VLEN) / instr.getVsew(); + switch (func3) { + case 0: // vector-vector + switch (func6) { + case 0: { + auto& vr1 = vRegFile_[rsrc0]; + auto& vr2 = vRegFile_[rsrc1]; + auto& vd = vRegFile_[rdest]; + auto& mask = vRegFile_[0]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t emask = *(uint8_t *)(mask.data() + i); + uint8_t value = emask & 0x1; + if (vmask || (!vmask && value)) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t result = first + second; + D(4, "Adding " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t emask = *(uint16_t *)(mask.data() + i); + uint16_t value = emask & 0x1; + if (vmask || (!vmask && value)) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t result = first + second; + D(4, "Adding " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t emask = *(uint32_t *)(mask.data() + i); + uint32_t value = emask & 0x1; + if (vmask || (!vmask && value)) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t result = first + second; + D(4, "Adding " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + } + } + } break; + case 24: { + //vmseq + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t result = (first == second) ? 1 : 0; + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t result = (first == second) ? 1 : 0; + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t result = (first == second) ? 1 : 0; + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + } + } break; + case 25: { + //vmsne + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t result = (first != second) ? 1 : 0; + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t result = (first != second) ? 1 : 0; + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t result = (first != second) ? 1 : 0; + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + } + } break; + case 26: { + //vmsltu + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t result = (first < second) ? 1 : 0; + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t result = (first < second) ? 1 : 0; + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t result = (first < second) ? 1 : 0; + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + } + } break; + case 27: { + //vmslt + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + int8_t first = *(int8_t *)(vr1.data() + i); + int8_t second = *(int8_t *)(vr2.data() + i); + int8_t result = (first < second) ? 1 : 0; + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + int16_t first = *(int16_t *)(vr1.data() + i); + int16_t second = *(int16_t *)(vr2.data() + i); + int16_t result = (first < second) ? 1 : 0; + D(4, "Comparing " << first << " + " << second << " = " << result); + *(int16_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + int32_t first = *(int32_t *)(vr1.data() + i); + int32_t second = *(int32_t *)(vr2.data() + i); + int32_t result = (first < second) ? 1 : 0; + D(4, "Comparing " << first << " + " << second << " = " << result); + *(int32_t *)(vd.data() + i) = result; + } + } + } break; + case 28: { + //vmsleu + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t result = (first <= second) ? 1 : 0; + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t result = (first <= second) ? 1 : 0; + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t result = (first <= second) ? 1 : 0; + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + } + } break; + case 29: { + //vmsle + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + int8_t first = *(int8_t *)(vr1.data() + i); + int8_t second = *(int8_t *)(vr2.data() + i); + int8_t result = (first <= second) ? 1 : 0; + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + int16_t first = *(int16_t *)(vr1.data() + i); + int16_t second = *(int16_t *)(vr2.data() + i); + int16_t result = (first <= second) ? 1 : 0; + D(4, "Comparing " << first << " + " << second << " = " << result); + *(int16_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + int32_t first = *(int32_t *)(vr1.data() + i); + int32_t second = *(int32_t *)(vr2.data() + i); + int32_t result = (first <= second) ? 1 : 0; + D(4, "Comparing " << first << " + " << second << " = " << result); + *(int32_t *)(vd.data() + i) = result; + } + } + } break; + case 30: { + //vmsgtu + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t result = (first > second) ? 1 : 0; + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t result = (first > second) ? 1 : 0; + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t result = (first > second) ? 1 : 0; + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + } + } break; + case 31: { + //vmsgt + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + int8_t first = *(int8_t *)(vr1.data() + i); + int8_t second = *(int8_t *)(vr2.data() + i); + int8_t result = (first > second) ? 1 : 0; + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + int16_t first = *(int16_t *)(vr1.data() + i); + int16_t second = *(int16_t *)(vr2.data() + i); + int16_t result = (first > second) ? 1 : 0; + D(4, "Comparing " << first << " + " << second << " = " << result); + *(int16_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + int32_t first = *(int32_t *)(vr1.data() + i); + int32_t second = *(int32_t *)(vr2.data() + i); + int32_t result = (first > second) ? 1 : 0; + D(4, "Comparing " << first << " + " << second << " = " << result); + *(int32_t *)(vd.data() + i) = result; + } + } + } break; + } + break; + case 2: { + switch (func6) { + case 24: { + // vmandnot + D(3, "vmandnot"); + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t first_value = (first & 0x1); + uint8_t second_value = (second & 0x1); + uint8_t result = (first_value & !second_value); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint8_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t first_value = (first & 0x1); + uint16_t second_value = (second & 0x1); + uint16_t result = (first_value & !second_value); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint16_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t first_value = (first & 0x1); + uint32_t second_value = (second & 0x1); + uint32_t result = (first_value & !second_value); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint32_t *)(vd.data() + i) = 0; + } + } + } break; + case 25: { + // vmand + D(3, "vmand"); + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t first_value = (first & 0x1); + uint8_t second_value = (second & 0x1); + uint8_t result = (first_value & second_value); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint8_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t first_value = (first & 0x1); + uint16_t second_value = (second & 0x1); + uint16_t result = (first_value & second_value); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint16_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t first_value = (first & 0x1); + uint32_t second_value = (second & 0x1); + uint32_t result = (first_value & second_value); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint32_t *)(vd.data() + i) = 0; + } + } + } break; + case 26: { + // vmor + D(3, "vmor"); + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t first_value = (first & 0x1); + uint8_t second_value = (second & 0x1); + uint8_t result = (first_value | second_value); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint8_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t first_value = (first & 0x1); + uint16_t second_value = (second & 0x1); + uint16_t result = (first_value | second_value); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint16_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t first_value = (first & 0x1); + uint32_t second_value = (second & 0x1); + uint32_t result = (first_value | second_value); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint32_t *)(vd.data() + i) = 0; + } + } + } break; + case 27: { + //vmxor + D(3, "vmxor"); + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t first_value = (first & 0x1); + uint8_t second_value = (second & 0x1); + uint8_t result = (first_value ^ second_value); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint8_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t first_value = (first & 0x1); + uint16_t second_value = (second & 0x1); + uint16_t result = (first_value ^ second_value); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint16_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t first_value = (first & 0x1); + uint32_t second_value = (second & 0x1); + uint32_t result = (first_value ^ second_value); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint32_t *)(vd.data() + i) = 0; + } + } + } break; + case 28: { + //vmornot + D(3, "vmornot"); + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t first_value = (first & 0x1); + uint8_t second_value = (second & 0x1); + uint8_t result = (first_value | !second_value); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint8_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t first_value = (first & 0x1); + uint16_t second_value = (second & 0x1); + uint16_t result = (first_value | !second_value); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint16_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t first_value = (first & 0x1); + uint32_t second_value = (second & 0x1); + uint32_t result = (first_value | !second_value); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint32_t *)(vd.data() + i) = 0; + } + } + } break; + case 29: { + //vmnand + D(3, "vmnand"); + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t first_value = (first & 0x1); + uint8_t second_value = (second & 0x1); + uint8_t result = !(first_value & second_value); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint8_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t first_value = (first & 0x1); + uint16_t second_value = (second & 0x1); + uint16_t result = !(first_value & second_value); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint16_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t first_value = (first & 0x1); + uint32_t second_value = (second & 0x1); + uint32_t result = !(first_value & second_value); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint32_t *)(vd.data() + i) = 0; + } + } + } break; + case 30: { + //vmnor + D(3, "vmnor"); + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t first_value = (first & 0x1); + uint8_t second_value = (second & 0x1); + uint8_t result = !(first_value | second_value); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint8_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t first_value = (first & 0x1); + uint16_t second_value = (second & 0x1); + uint16_t result = !(first_value | second_value); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint16_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t first_value = (first & 0x1); + uint32_t second_value = (second & 0x1); + uint32_t result = !(first_value | second_value); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint32_t *)(vd.data() + i) = 0; + } + } + } break; + case 31: { + //vmxnor + D(3, "vmxnor"); + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t first_value = (first & 0x1); + uint8_t second_value = (second & 0x1); + uint8_t result = !(first_value ^ second_value); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint8_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t first_value = (first & 0x1); + uint16_t second_value = (second & 0x1); + uint16_t result = !(first_value ^ second_value); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint16_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t first_value = (first & 0x1); + uint32_t second_value = (second & 0x1); + uint32_t result = !(first_value ^ second_value); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint32_t *)(vd.data() + i) = 0; + } + } + } break; + case 37: { + //vmul + D(3, "vmul"); + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t result = (first * second); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint8_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t result = (first * second); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint16_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t result = (first * second); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint32_t *)(vd.data() + i) = 0; + } + } + } break; + case 45: { + // vmacc + D(3, "vmacc"); + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t result = (first * second); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) += result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint8_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t result = (first * second); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) += result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint16_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t result = (first * second); + D(4, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) += result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint32_t *)(vd.data() + i) = 0; + } + } + } break; + } + } break; + case 6: { + switch (func6) { + case 0: { + D(3, "vmadd.vx"); + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t result = (iregs[rsrc0] + second); + D(4, "Comparing " << iregs[rsrc0] << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint8_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t result = (iregs[rsrc0] + second); + D(4, "Comparing " << iregs[rsrc0] << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint16_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t result = (iregs[rsrc0] + second); + D(4, "Comparing " << iregs[rsrc0] << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint32_t *)(vd.data() + i) = 0; + } + } + } break; + case 37: { + // vmul.vx + D(3, "vmul.vx"); + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t result = (iregs[rsrc0] * second); + D(4, "Comparing " << iregs[rsrc0] << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint8_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t result = (iregs[rsrc0] * second); + D(4, "Comparing " << iregs[rsrc0] << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint16_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t result = (iregs[rsrc0] * second); + D(4, "Comparing " << iregs[rsrc0] << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint32_t *)(vd.data() + i) = 0; + } + } + } break; + } + } break; + case 7: { + vtype_.vill = 0; + vtype_.vediv = instr.getVediv(); + vtype_.vsew = instr.getVsew(); + vtype_.vlmul = instr.getVlmul(); + + D(3, "lmul:" << vtype_.vlmul << " sew:" << vtype_.vsew << " ediv: " << vtype_.vediv << "rsrc_" << iregs[rsrc0] << "VLMAX" << VLMAX); + + int s0 = iregs[rsrc0]; + if (s0 <= VLMAX) { + vl_ = s0; + } else if (s0 < (2 * VLMAX)) { + vl_ = (int)ceil((s0 * 1.0) / 2.0); + } else if (s0 >= (2 * VLMAX)) { + vl_ = VLMAX; + } + iresult = vl_; + } break; + default: + std::abort(); + } + } break; default: - D(3, "PC: " << std::hex << (PC_ - 4)); - D(3, "ERROR: Unsupported instruction: " << instr); std::abort(); } - if (instr.hasRDest()) { - if (instr.is_FpDest()) { - D(3, "r" << std::dec << rdest << "=0x" << std::hex << std::hex << fregs[rdest]); - } else { - D(3, "r" << std::dec << rdest << "=0x" << std::hex << std::hex << iregs[rdest]); + int rdt = instr.getRDType(); + switch (rdt) { + case 1: + if (rdest) { + D(3, "[" << std::dec << t << "] Dest Register: r" << rdest << "=0x" << std::hex << std::hex << iresult); + iregs[rdest] = iresult; } + break; + case 2: + D(3, "[" << std::dec << t << "] Dest Register: fr" << rdest << "=0x" << std::hex << std::hex << fresult); + fregs[rdest] = fresult; + break; + default: + break; } } - if (updatePC) { - PC_ = nextPC; + PC_ += core_->arch().wsize(); + if (PC_ != nextPC) { D(3, "Next PC: " << std::hex << nextPC << std::dec); + PC_ = nextPC; } } diff --git a/simX/instr.h b/simX/instr.h index 32dde81f..a93dd61b 100644 --- a/simX/instr.h +++ b/simX/instr.h @@ -1,7 +1,6 @@ #pragma once #include "types.h" -#include "trace.h" namespace vortex { @@ -20,12 +19,7 @@ enum Opcode { JALR_INST = 0x67, SYS_INST = 0x73, FENCE = 0x0f, - PJ_INST = 0x7b, - GPGPU = 0x6b, - VSET_ARITH= 0x57, - VL = 0x7, - VS = 0x27, - // F-Extension + // F Extension FL = 0x7, FS = 0x27, FCI = 0x53, @@ -33,6 +27,12 @@ enum Opcode { FMSUB = 0x47, FMNMSUB = 0x4b, FMNMADD = 0x4f, + // Vector Extension + VSET = 0x57, + VL = 0x7, + VS = 0x27, + // GPGPU Extension + GPGPU = 0x6b, }; enum InstType { @@ -51,34 +51,27 @@ class Instr { public: Instr() : opcode_(Opcode::NOP) - , nRsrc_(0) - , hasImmSrc_(false) - , hasRDest_(false) - , is_iDest_(false) - , is_FpDest_(false) - , is_VDest_(false) - , is_FpSrc_(0) - , is_VSrc_(0) - , func2_(0) + , num_rsrcs_(0) + , has_imm_(false) + , rdest_(0) , func3_(0) - , func7_(0) - {} - - - friend std::ostream &operator<<(std::ostream &, Instr &); + , func7_(0) { + for (int i = 0; i < MAX_REG_SOURCES; ++i) { + rsrc_type_[i] = 0; + } + } /* Setters used to "craft" the instruction. */ void setOpcode(Opcode opcode) { opcode_ = opcode; } - void setDestReg(int destReg) { hasRDest_ = true; is_iDest_ = true; rdest_ = destReg; } - void setSrcReg(int srcReg) { rsrc_[nRsrc_++] = srcReg; } - void setDestFReg(int destReg) { hasRDest_ = true; is_FpDest_ = true; rdest_ = destReg; } - void setSrcFReg(int srcReg) { is_FpSrc_ |= (1 << nRsrc_); rsrc_[nRsrc_++] = srcReg; } - void setDestVReg(int destReg) { hasRDest_ = true; is_VDest_ = true; rdest_ = destReg; } - void setSrcVReg(int srcReg) { is_VSrc_ |= (1 << nRsrc_); rsrc_[nRsrc_++] = srcReg; } + void setDestReg(int destReg) { rdest_type_ = 1; rdest_ = destReg; } + void setSrcReg(int srcReg) { rsrc_type_[num_rsrcs_] = 1; rsrc_[num_rsrcs_++] = srcReg; } + void setDestFReg(int destReg) { rdest_type_ = 2; rdest_ = destReg; } + void setSrcFReg(int srcReg) { rsrc_type_[num_rsrcs_] = 2; rsrc_[num_rsrcs_++] = srcReg; } + void setDestVReg(int destReg) { rdest_type_ = 3; rdest_ = destReg; } + void setSrcVReg(int srcReg) { rsrc_type_[num_rsrcs_] = 3; rsrc_[num_rsrcs_++] = srcReg; } void setFunc3(Word func3) { func3_ = func3; } void setFunc7(Word func7) { func7_ = func7; } - void setSrcImm(Word srcImm) { hasImmSrc_ = true; immsrc_ = srcImm; } - void setVsetImm(Word vset_imm) { if (vset_imm) vsetImm_ = true; else vsetImm_ = false; } + void setImm(Word imm) { has_imm_ = true; imm_ = imm; } void setVlsWidth(Word width) { vlsWidth_ = width; } void setVmop(Word mop) { vMop_ = mop; } void setVnf(Word nf) { vNf_ = nf; } @@ -94,29 +87,22 @@ public: Word getFunc3() const { return func3_; } Word getFunc6() const { return func6_; } Word getFunc7() const { return func7_; } - int getNRSrc() const { return nRsrc_; } + int getNRSrc() const { return num_rsrcs_; } int getRSrc(int i) const { return rsrc_[i]; } - bool hasRDest() const { return hasRDest_; } - int getRDest() const { return rdest_; } - bool hasImm() const { return hasImmSrc_; } - Word getImm() const { return immsrc_; } - bool getVsetImm() const { return vsetImm_; } + int getRSType(int i) const { return rsrc_type_[i]; } + int getRDest() const { return rdest_; } + int getRDType() const { return rdest_type_; } + bool hasImm() const { return has_imm_; } + Word getImm() const { return imm_; } Word getVlsWidth() const { return vlsWidth_; } Word getVmop() const { return vMop_; } Word getvNf() const { return vNf_; } - bool getVmask() const { return vmask_; } + Word getVmask() const { return vmask_; } Word getVs3() const { return vs3_; } Word getVlmul() const { return vlmul_; } Word getVsew() const { return vsew_; } Word getVediv() const { return vediv_; } - bool is_iDest() const { return is_iDest_; } - bool is_FpDest() const { return is_FpDest_; } - bool is_FpSrc(int i) const { return (is_FpSrc_ >> i) & 0x1; } - - bool is_VDest() const { return is_VDest_; } - bool is_VSrc(int i) const { return (is_VSrc_ >> i) & 0x1; } - private: enum { @@ -124,24 +110,21 @@ private: }; Opcode opcode_; - int nRsrc_; - bool hasImmSrc_; - bool hasRDest_; - bool is_iDest_; - bool is_FpDest_; - bool is_VDest_; - int is_FpSrc_; - int is_VSrc_; - Word immsrc_; - Word func2_; + int num_rsrcs_; + bool has_imm_; + int rdest_type_; + int isrc_mask_; + int fsrc_mask_; + int vsrc_mask_; + Word imm_; + int rsrc_type_[MAX_REG_SOURCES]; + int rsrc_[MAX_REG_SOURCES]; + int rdest_; Word func3_; Word func7_; - int rsrc_[MAX_REG_SOURCES]; - int rdest_; //Vector - bool vsetImm_; - bool vmask_; + Word vmask_; Word vlsWidth_; Word vMop_; Word vNf_; @@ -150,8 +133,8 @@ private: Word vsew_; Word vediv_; Word func6_; + + friend std::ostream &operator<<(std::ostream &, const Instr&); }; -std::ostream &operator<<(std::ostream &, Instr &); - } \ No newline at end of file diff --git a/simX/main.cpp b/simX/main.cpp index e98e12a8..8f11b682 100644 --- a/simX/main.cpp +++ b/simX/main.cpp @@ -22,6 +22,7 @@ int main(int argc, char **argv) { std::string imgFileName; bool showHelp(false); bool showStats(false); + bool riscv_test(false); /* Read the command line arguments. */ CommandLineArgFlag fh("-h", "--help", "", showHelp); @@ -30,6 +31,7 @@ int main(int argc, char **argv) { CommandLineArgSetter fc("-c", "--cores", "", num_cores); CommandLineArgSetter fw("-w", "--warps", "", num_warps); CommandLineArgSetter ft("-t", "--threads", "", num_threads); + CommandLineArgFlag fr("-r", "--riscv", "", riscv_test); CommandLineArgFlag fs("-s", "--stats", "", showStats); CommandLineArg::readArgs(argc - 1, argv + 1); @@ -41,6 +43,7 @@ int main(int argc, char **argv) { " -w, --warps Number of warps\n" " -t, --threads Number of threads\n" " -a, --arch Architecture string\n" + " -r, --riscv riscv test\n" " -s, --stats Print stats on exit.\n"; return 0; } @@ -64,16 +67,24 @@ int main(int argc, char **argv) { } bool running; - do { running = false; - for (int i = 0; i < num_cores; ++i) { - if (!cores[i]->running()) - continue; - running = true; - cores[i]->step(); + for (auto& core : cores) { + core->step(); + if (core->running()) + running = true; } } while (running); + if (riscv_test) { + bool status = (1 == cores[0]->getIRegValue(3)); + if (status) { + std::cout << "Passed." << std::endl; + } else { + std::cout << "Failed." << std::endl; + return -1; + } + } + return 0; } diff --git a/simX/mem.cpp b/simX/mem.cpp index bea4c70d..ad83db88 100644 --- a/simX/mem.cpp +++ b/simX/mem.cpp @@ -90,11 +90,14 @@ void MemoryUnit::ADecoder::write(Addr a, Word w, bool /*sup*/, Size wordSize) { throw BadAddress(); } RAM *ram = (RAM *)ma.md; - if (wordSize == 8) { + switch (wordSize) { + case 1: ram->writeByte(ma.addr, &w); - } else if (wordSize == 16) { + break; + case 2: ram->writeHalf(ma.addr, &w); - } else { + break; + default: ram->writeWord(ma.addr, &w); } } @@ -247,7 +250,6 @@ void DiskControllerMemDevice::write(Addr a, Word w) { RAM::RAM(uint32_t num_pages, uint32_t page_size) : page_bits_(log2ceil(page_size)) { - assert(page_size >= 4); assert(ispow2(page_size)); mem_.resize(num_pages, NULL); uint64_t sizel = uint64_t(mem_.size()) << page_bits_; @@ -272,15 +274,16 @@ Size RAM::size() const { } uint8_t *RAM::get(uint32_t address) { - uint32_t page_size = 14 << page_bits_; + uint32_t page_size = 1 << page_bits_; uint32_t page_index = address >> page_bits_; uint32_t byte_offset = address & ((1 << page_bits_) - 1); uint8_t* &page = mem_.at(page_index); if (page == NULL) { uint8_t *ptr = new uint8_t[page_size]; - for (uint32_t i = 0; i < (page_size / 4); ++i) { - ((uint32_t*)ptr)[i] = 0xddccbbaa; + // set uninitialized data to "baadf00d" + for (uint32_t i = 0; i < page_size; ++i) { + ptr[i] = (0xbaadf00d >> ((i & 0x3) * 8)) & 0xff; } page = ptr; } diff --git a/simX/pipeline.cpp b/simX/pipeline.cpp new file mode 100644 index 00000000..c54977a0 --- /dev/null +++ b/simX/pipeline.cpp @@ -0,0 +1,63 @@ +#include +#include "pipeline.h" + +using namespace vortex; + +namespace vortex { +std::ostream &operator<<(std::ostream &os, const Pipeline& pipeline) { + os << pipeline.name_ << ": valid=" << pipeline.valid << std::endl; + os << pipeline.name_ << ": stalled=" << pipeline.stalled << std::endl; + os << pipeline.name_ << ": stall_warp=" << pipeline.stall_warp << std::endl; + os << pipeline.name_ << ": wid=" << pipeline.wid << std::endl; + os << pipeline.name_ << ": PC=" << std::hex << pipeline.PC << std::endl; + os << pipeline.name_ << ": used_iregs=" << pipeline.used_iregs << std::endl; + os << pipeline.name_ << ": used_fregs=" << pipeline.used_fregs << std::endl; + os << pipeline.name_ << ": used_vregs=" << pipeline.used_vregs << std::endl; + return os; +} +} + +Pipeline::Pipeline(const char* name) +: name_(name) { + this->clear(); +} + +void Pipeline::clear() { + valid = false; + stalled = false; + stall_warp = false; + wid = 0; + PC = 0; + used_iregs.reset(); + used_fregs.reset(); + used_vregs.reset(); +} + +bool Pipeline::enter(Pipeline *drain) { + if (drain) { + if (drain->stalled) { + this->stalled = true; + return false; + } + drain->valid = false; + } + this->stalled = false; + if (!this->valid) + return false; + return true; +} + +void Pipeline::next(Pipeline *drain) { + if (drain) { + drain->valid = this->valid; + drain->stalled = this->stalled; + drain->stall_warp = this->stall_warp; + drain->wid = this->wid; + drain->PC = this->PC; + drain->rdest = this->rdest; + drain->rdest_type = this->rdest_type; + drain->used_iregs = this->used_iregs; + drain->used_fregs = this->used_fregs; + drain->used_vregs = this->used_vregs; + } +} \ No newline at end of file diff --git a/simX/pipeline.h b/simX/pipeline.h new file mode 100644 index 00000000..42cb2af1 --- /dev/null +++ b/simX/pipeline.h @@ -0,0 +1,47 @@ + +#pragma once + +#include +#include "debug.h" +#include "util.h" + +namespace vortex { + +class Instr; + +class Pipeline { +public: + Pipeline(const char* name); + + void clear(); + + bool enter(Pipeline* drain); + + void next(Pipeline* drain); + + //-- + bool valid; + + //-- + bool stalled; + bool stall_warp; + + //-- + int wid; + Word PC; + + //-- + int rdest_type; + int rdest; + RegMask used_iregs; + RegMask used_fregs; + RegMask used_vregs; + +private: + + const char* name_; + + friend std::ostream &operator<<(std::ostream &, const Pipeline&); +}; + +} \ No newline at end of file diff --git a/simX/test_runtime.sh b/simX/test_runtime.sh index 16fe2f28..93506147 100755 --- a/simX/test_runtime.sh +++ b/simX/test_runtime.sh @@ -1,16 +1,14 @@ #!/bin/bash +set -e + make make -C ../runtime/tests/dev make -C ../runtime/tests/hello make -C ../runtime/tests/nlTest make -C ../runtime/tests/simple -echo start > results.txt - -printf "Fasten your seatbelts ladies and gentelmen!!\n\n\n\n" - -#./simX -a rv32i -i ../runtime/tests/dev/vx_dev_main.hex -s 1> emulator.debug -#./simX -a rv32i -i ../runtime/tests/hello/hello.hex -s 1> emulator.debug -./simX -a rv32i -i ../runtime/tests/nlTest/vx_nl_main.hex -s 1> emulator.debug -./simX -a rv32i -i ../runtime/tests/simple/vx_simple_main.hex -s 1> emulator.debug +./simX -a rv32i -i ../runtime/tests/dev/vx_dev_main.hex +./simX -a rv32i -i ../runtime/tests/hello/hello.hex +./simX -a rv32i -i ../runtime/tests/nlTest/vx_nl_main.hex +./simX -a rv32i -i ../runtime/tests/simple/vx_simple_main.hex diff --git a/simX/test_rv32f.sh b/simX/test_rv32f.sh new file mode 100755 index 00000000..2822b672 --- /dev/null +++ b/simX/test_rv32f.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +set -e + +make + +echo ../benchmarks/riscv_tests/isa/rv32uf-p-fadd.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32uf-p-fadd.hex + +echo ../benchmarks/riscv_tests/isa/rv32uf-p-fmadd.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32uf-p-fmadd.hex + +echo ../benchmarks/riscv_tests/isa/rv32uf-p-fmin.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32uf-p-fmin.hex + +echo ../benchmarks/riscv_tests/isa/rv32uf-p-fcmp.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32uf-p-fcmp.hex + +echo ../benchmarks/riscv_tests/isa/rv32uf-p-fdst.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32uf-p-ldst.hex + +echo ../benchmarks/riscv_tests/isa/rv32uf-p-fcvt.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32uf-p-fcvt.hex + +echo ../benchmarks/riscv_tests/isa/rv32uf-p-fcvt_w.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32uf-p-fcvt_w.hex + +echo ../benchmarks/riscv_tests/isa/rv32uf-p-move.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32uf-p-move.hex + +echo ../benchmarks/riscv_tests/isa/rv32uf-p-recording.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32uf-p-recoding.hex + +echo ../benchmarks/riscv_tests/isa/rv32uf-p-fdiv.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32uf-p-fdiv.hex + +echo ../benchmarks/riscv_tests/isa/rv32uf-p-fclass.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32uf-p-fclass.hex \ No newline at end of file diff --git a/simX/test_rv32i.sh b/simX/test_rv32i.sh new file mode 100755 index 00000000..5c67988a --- /dev/null +++ b/simX/test_rv32i.sh @@ -0,0 +1,143 @@ +#!/bin/bash + +set -e + +make + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-add.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-add.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-addi.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-addi.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-and.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-and.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-andi.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-andi.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-auipc.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-auipc.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-beq.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-beq.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-bge.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-bge.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-bgeu.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-bgeu.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-blt.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-blt.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-bltu.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-bltu.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-bne.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-bne.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-jal.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-jal.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-jalr.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-jalr.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-lb.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-lb.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-lbu.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-lbu.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-lh.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-lh.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-lhu.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-lhu.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-lui.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-lui.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-lw.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-lw.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-or.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-or.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-ori.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-ori.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-sb.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-sb.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-sh.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-sh.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-simple.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-simple.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-sll.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-sll.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-slli.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-slli.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-slt.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-slt.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-slti.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-slti.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-sltiu.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-sltiu.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-sltu.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-sltu.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-sra.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-sra.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-srai.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-srai.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-srl.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-srl.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-srli.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-srli.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-sub.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-sub.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-sw.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-sw.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-xor.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-xor.hex + +echo ./../benchmarks/riscv_tests/isa/rv32ui-p-xori.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32ui-p-xori.hex + +echo ./../benchmarks/riscv_tests/isa/rv32um-p-div.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32um-p-div.hex + +echo ./../benchmarks/riscv_tests/isa/rv32um-p-divu.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32um-p-divu.hex + +echo ./../benchmarks/riscv_tests/isa/rv32um-p-mul.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32um-p-mul.hex + +echo ./../benchmarks/riscv_tests/isa/rv32um-p-mulh.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32um-p-mulh.hex + +echo ./../benchmarks/riscv_tests/isa/rv32um-p-mulhsu.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32um-p-mulhsu.hex + +echo ./../benchmarks/riscv_tests/isa/rv32um-p-mulhu.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32um-p-mulhu.hex + +echo ./../benchmarks/riscv_tests/isa/rv32um-p-rem.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32um-p-rem.hex + +echo ./../benchmarks/riscv_tests/isa/rv32um-p-remu.hex +./simX -a rv32i -r -i ../benchmarks/riscv_tests/isa/rv32um-p-remu.hex \ No newline at end of file diff --git a/simX/types.h b/simX/types.h index d10ae2a5..256118b2 100644 --- a/simX/types.h +++ b/simX/types.h @@ -8,10 +8,13 @@ namespace vortex { typedef uint8_t Byte; typedef uint32_t Word; +typedef int32_t WordI; typedef uint32_t Addr; typedef uint32_t Size; +typedef std::bitset<32> RegMask; + typedef std::bitset<32> ThreadMask; typedef std::bitset<32> WarpMask; diff --git a/simX/util.cpp b/simX/util.cpp index f33ebf34..95628917 100644 --- a/simX/util.cpp +++ b/simX/util.cpp @@ -1,6 +1,9 @@ #include #include #include +#include +#include +#include #include "types.h" #include "util.h" @@ -76,3 +79,102 @@ void vortex::writeWord(std::vector &p, Size &n, Size wordSize, Word w) { w >>= 8; } } + +// Convert 32-bit integer register file to IEEE-754 floating point number. +float vortex::intregToFloat(uint32_t input) { + // 31th bit + bool sign = input & 0x80000000; + // Exponent: 23th ~ 30th bits -> 8 bits in total + int32_t exp = ((input & 0x7F800000)>>23); + // printf("exp = %u\n", exp); + // 0th ~ 22th bits -> 23 bits fraction + uint32_t frac = input & 0x007FFFFF; + // Frac_value= 1 + sum{i = 1}{23}{b_{23-i}*2^{-i}} + double frac_value; + if (exp == 0) { // subnormal + if (frac == 0) { + // zero + if (sign) + return -0.0; + else + return 0.0; + } + frac_value = 0.0; + } else + frac_value = 1.0; + + for (int i = 0; i < 23; i++) { + int bi = frac & 0x1; + frac_value += static_cast(bi * pow(2.0, i-23)); + frac = (frac >> 1); + } + + return (float)((static_cast(pow(-1.0, sign))) * (static_cast(pow(2.0, exp - 127.0)))* frac_value); +} + +// Convert a floating point number to IEEE-754 32-bit representation, +// so that it could be stored in a 32-bit integer register file +// Reference: https://www.wikihow.com/Convert-a-Number-from-Decimal-to-IEEE-754-Floating-Point-Representation + // https://www.technical-recipes.com/2012/converting-between-binary-and-decimal-representations-of-ieee-754-floating-point-numbers-in-c/ +uint32_t vortex::floatToBin(float in_value) { + union { + float input; // assumes sizeof(float) == sizeof(int) + int output; + } data; + + data.input = in_value; + + std::bitset bits(data.output); + std::string mystring = bits.to_string, std::allocator >(); + // Convert binary to uint32_t + Word result = stoul(mystring, nullptr, 2); + return result; +} + +// https://en.wikipedia.org/wiki/Single-precision_floating-point_format +// check floating-point number in binary format is NaN +uint8_t vortex::fpBinIsNan(uint32_t din) { + bool fsign = din & 0x80000000; + uint32_t expo = (din>>23) & 0x000000FF; + uint32_t fraction = din & 0x007FFFFF; + uint32_t bit_22 = din & 0x00400000; + + if ((expo==0xFF) && (fraction!=0)) { + // if (!fsign && (fraction == 0x00400000)) + if (!fsign && (bit_22)) + return 1; // quiet NaN, return 1 + else + return 2; // signaling NaN, return 2 + } + return 0; +} + +// check floating-point number in binary format is zero +uint8_t vortex::fpBinIsZero(uint32_t din) { + bool fsign = din & 0x80000000; + uint32_t expo = (din>>23) & 0x000000FF; + uint32_t fraction = din & 0x007FFFFF; + + if ((expo==0) && (fraction==0)) { + if (fsign) + return 1; // negative 0 + else + return 2; // positive 0 + } + return 0; // not zero +} + +// check floating-point number in binary format is infinity +uint8_t vortex::fpBinIsInf(uint32_t din) { + bool fsign = din & 0x80000000; + uint32_t expo = (din>>23) & 0x000000FF; + uint32_t fraction = din & 0x007FFFFF; + + if ((expo==0xFF) && (fraction==0)) { + if (fsign) + return 1; // negative infinity + else + return 2; // positive infinity + } + return 0; // not infinity +} \ No newline at end of file diff --git a/simX/util.h b/simX/util.h index 7456fff4..b50db756 100644 --- a/simX/util.h +++ b/simX/util.h @@ -30,4 +30,19 @@ Word readWord(const std::vector &b, Size &n, Size wordSize); void writeByte(std::vector &p, Size &n, Byte b); void writeWord(std::vector &p, Size &n, Size wordSize, Word w); +// Convert 32-bit integer register file to IEEE-754 floating point number. +float intregToFloat(uint32_t input); + +// Convert a floating point number to IEEE-754 32-bit representation +uint32_t floatToBin(float in_value); + +// check floating-point number in binary format is NaN +uint8_t fpBinIsNan(uint32_t din); + +// check floating-point number in binary format is zero +uint8_t fpBinIsZero(uint32_t din); + +// check floating-point number in binary format is infinity +uint8_t fpBinIsInf(uint32_t din); + } \ No newline at end of file diff --git a/simX/warp.cpp b/simX/warp.cpp index f09f0036..c5a0c407 100644 --- a/simX/warp.cpp +++ b/simX/warp.cpp @@ -12,53 +12,72 @@ using namespace vortex; Warp::Warp(Core *core, Word id) : id_(id) - , active_(false) - , core_(core) - , PC_(0x80000000) - , steps_(0) - , insts_(0) - , loads_(0) - , stores_(0) { - - tmask_.reset(); - + , core_(core) { iRegFile_.resize(core_->arch().num_threads(), std::vector(core_->arch().num_regs(), 0)); fRegFile_.resize(core_->arch().num_threads(), std::vector(core_->arch().num_regs(), 0)); - vRegFile_.resize(core_->arch().num_regs(), std::vector(core_->arch().vsize(), 0)); + vRegFile_.resize(core_->arch().num_regs(), std::vector(core_->arch().vsize(), 0)); + this->clear(); } -void Warp::step(trace_inst_t *trace_inst) { +void Warp::clear() { + PC_ = STARTUP_ADDR; + tmask_.reset(); + active_ = false; +} + +void Warp::step(Pipeline *pipeline) { assert(tmask_.any()); - Size fetchPos(0); - Size decPos; - Size wordSize(core_->arch().wsize()); - std::vector fetchBuffer(wordSize); + D(3, "Step: wid=" << id_ << ", PC=0x" << std::hex << PC_); - ++steps_; + /* Fetch and decode. */ - D(3, "current PC=0x" << std::hex << PC_); + Word fetched = core_->icache_fetch(PC_, 0); + auto instr = core_->decoder().decode(fetched); - // std::cout << "PC: " << std::hex << PC << "\n"; - trace_inst->PC = PC_; + // Update pipeline + pipeline->valid = true; + pipeline->PC = PC_; + pipeline->rdest = instr->getRDest(); + pipeline->rdest_type = instr->getRDType(); + pipeline->used_iregs.reset(); + pipeline->used_fregs.reset(); + pipeline->used_vregs.reset(); - /* Fetch and decode. */ - if (wordSize < sizeof(PC_)) - PC_ &= ((1ll << (wordSize * 8)) - 1); - - unsigned fetchSize = 4; - fetchBuffer.resize(fetchSize); - Word fetched = core_->icache_fetch(PC_ + fetchPos, 0); - writeWord(fetchBuffer, fetchPos, fetchSize, fetched); - - decPos = 0; - std::shared_ptr instr = core_->decoder().decode(fetchBuffer, decPos, trace_inst); - - // Update PC - PC_ += decPos; + switch (pipeline->rdest_type) { + case 1: + pipeline->used_iregs[pipeline->rdest] = 1; + break; + case 2: + pipeline->used_fregs[pipeline->rdest] = 1; + break; + case 3: + pipeline->used_vregs[pipeline->rdest] = 1; + break; + default: + break; + } + for (int i = 0; i < instr->getNRSrc(); ++i) { + int type = instr->getRSType(i); + int reg = instr->getRSrc(i); + switch (type) { + case 1: + pipeline->used_iregs[reg] = 1; + break; + case 2: + pipeline->used_fregs[reg] = 1; + break; + case 3: + pipeline->used_vregs[reg] = 1; + break; + default: + break; + } + } + // Execute - this->execute(*instr, trace_inst); + this->execute(*instr, pipeline); // At Debug Level 3, print debug info after each instruction. D(4, "Register state:"); @@ -74,11 +93,4 @@ void Warp::step(trace_inst_t *trace_inst) { for (int i = 0; i < core_->arch().num_threads(); ++i) DPN(3, " " << tmask_[i]); DPN(3, "\n"); -} - -void Warp::printStats() const { - std::cout << "Steps : " << steps_ << std::endl - << "Insts : " << insts_ << std::endl - << "Loads : " << loads_ << std::endl - << "Stores: " << stores_ << std::endl; } \ No newline at end of file diff --git a/simX/warp.h b/simX/warp.h index 5ed84a14..da91f78d 100644 --- a/simX/warp.h +++ b/simX/warp.h @@ -7,6 +7,9 @@ namespace vortex { +class Core; +class Instr; +class Pipeline; struct DomStackEntry { DomStackEntry(const ThreadMask &tmask, Word PC) : tmask(tmask) @@ -34,14 +37,11 @@ struct vtype { int vsew; int vlmul; }; - -class Core; -class Instr; -class trace_inst_t; - class Warp { public: - Warp(Core *core, Word id = 0); + Warp(Core *core, Word id); + + void clear(); bool active() const { return active_; @@ -57,12 +57,6 @@ public: return 0; } - void printStats() const; - - Core *core() { - return core_; - } - Word id() const { return id_; } @@ -80,11 +74,15 @@ public: active_ = tmask_.any(); } - void step(trace_inst_t *); + Word getIRegValue(int reg) const { + return iRegFile_[0][reg]; + } + + void step(Pipeline *); private: - void execute(Instr &instr, trace_inst_t *); + void execute(const Instr &instr, Pipeline *); Word id_; bool active_; @@ -100,11 +98,6 @@ private: struct vtype vtype_; int vl_; - - unsigned long steps_; - unsigned long insts_; - unsigned long loads_; - unsigned long stores_; }; }