diff --git a/sim/simx/Makefile b/sim/simx/Makefile new file mode 100644 index 00000000..0feba083 --- /dev/null +++ b/sim/simx/Makefile @@ -0,0 +1,50 @@ +RTL_DIR = ../hw/rtl + +CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Werror -g +CXXFLAGS += -fPIC -Wno-maybe-uninitialized +CXXFLAGS += -I. -I../common -I../../hw +CXXFLAGS += -I../common/softfloat/source/include +CXXFLAGS += $(CONFIGS) + +LDFLAGS += ../common/softfloat/build/Linux-x86_64-GCC/softfloat.a + +TOP = vx_cache_sim + +SRCS = ../common/util.cpp ../common/mem.cpp ../common/rvfloats.cpp +SRCS += args.cpp pipeline.cpp warp.cpp core.cpp decode.cpp execute.cpp main.cpp + +OBJS := $(patsubst %.cpp, obj_dir/%.o, $(notdir $(SRCS))) +VPATH := $(sort $(dir $(SRCS))) + +#$(info OBJS is $(OBJS)) +#$(info VPATH is $(VPATH)) + +# Debugigng +ifdef DEBUG + CXXFLAGS += -g -O0 -DDEBUG_LEVEL=$(DEBUG) +else + CXXFLAGS += -O2 -DNDEBUG +endif + +PROJECT = simX + +all: $(PROJECT) + +$(PROJECT): $(SRCS) + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ + +obj_dir/%.o: %.cpp + mkdir -p obj_dir + $(CXX) $(CXXFLAGS) -c $< -o $@ + +static: $(OBJS) + $(AR) rcs lib$(PROJECT).a $(OBJS) ../common/softfloat/build/Linux-x86_64-GCC/*.o + +.depend: $(SRCS) + $(CXX) $(CXXFLAGS) -MM $^ > .depend; + +clean-static: + rm -rf lib$(PROJECT).a obj_dir .depend + +clean: clean-static + rm -rf $(PROJECT) \ No newline at end of file diff --git a/sim/simx/archdef.h b/sim/simx/archdef.h new file mode 100644 index 00000000..73e28a15 --- /dev/null +++ b/sim/simx/archdef.h @@ -0,0 +1,73 @@ +#pragma once + +#include +#include + +#include +#include +#include "types.h" + +namespace vortex { + +class ArchDef { +public: + ArchDef(const std::string &/*arch*/, + int num_cores, + int num_warps, + int num_threads) { + // simx64 + wsize_ = 8; + vsize_ = 16; + num_regs_ = 32; + num_csrs_ = 4096; + num_barriers_= NUM_BARRIERS; + num_cores_ = num_cores; + num_warps_ = num_warps; + num_threads_ = num_threads; + } + + int wsize() const { + return wsize_; + } + + int vsize() const { + return vsize_; + } + + int num_regs() const { + return num_regs_; + } + + int num_csrs() const { + return num_csrs_; + } + + int num_barriers() const { + return num_barriers_; + } + + int num_threads() const { + return num_threads_; + } + + int num_warps() const { + return num_warps_; + } + + int num_cores() const { + return num_cores_; + } + +private: + + int wsize_; + int vsize_; + int num_regs_; + int num_csrs_; + int num_barriers_; + int num_threads_; + int num_warps_; + int num_cores_; +}; + +} \ No newline at end of file diff --git a/sim/simx/args.cpp b/sim/simx/args.cpp new file mode 100644 index 00000000..2cd847b2 --- /dev/null +++ b/sim/simx/args.cpp @@ -0,0 +1,47 @@ +#include +#include +#include "args.h" + +using namespace vortex; +using std::string; + +std::string CommandLineArg::helpString_; +std::unordered_map CommandLineArg::longArgs_; +std::unordered_map CommandLineArg::shortArgs_; + +CommandLineArg::CommandLineArg(string s, string l, const char *helpText) { + helpString_ += helpText; + longArgs_[l] = this; + shortArgs_[s] = this; +} + +CommandLineArg::CommandLineArg(string l, const char *helpText) { + helpString_ += helpText; + longArgs_[l] = this; +} + +void CommandLineArg::readArgs(int argc, char **argv) { + for (int i = 0; i < argc; i++) { + std::unordered_map::iterator + s = shortArgs_.find(std::string(argv[i])), + l = longArgs_.find(std::string(argv[i])); + + if (s != shortArgs_.end()) { + i += s->second->read(argc - i, &argv[i]); + } else if (l != longArgs_.end()) { + i += l->second->read(argc - i, &argv[i]); + } else { + throw BadArg(string(argv[i])); + } + } +} + +void CommandLineArg::clearArgs() { + shortArgs_.clear(); + longArgs_.clear(); + helpString_ = ""; +} + +void CommandLineArg::showHelp(std::ostream &os) { + os << helpString_; +} diff --git a/sim/simx/args.h b/sim/simx/args.h new file mode 100644 index 00000000..aeaba4e5 --- /dev/null +++ b/sim/simx/args.h @@ -0,0 +1,64 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace vortex { + +struct BadArg { BadArg(std::string s) : arg(s) {} std::string arg; }; + +class CommandLineArg { +public: + CommandLineArg(std::string s, std::string l, const char *helpText); + CommandLineArg(std::string l, const char *helpText); + virtual int read(int argc, char** argv) = 0; + + static void readArgs(int argc, char **argv); + static void clearArgs(); + static void showHelp(std::ostream &os); + +private: + static std::string helpString_; + static std::unordered_map longArgs_; + static std::unordered_map shortArgs_; +}; + +template class CommandLineArgSetter : public CommandLineArg { +public: + CommandLineArgSetter(std::string s, std::string l, const char *ht, T &x) : + CommandLineArg(s, l, ht), arg_(x) {} + + CommandLineArgSetter(std::string l, const char *ht, T &x) : + CommandLineArg(l, ht), arg_(x) {} + + int read(int argc, char **argv) { + __unused(argc); + std::istringstream iss(argv[1]); + iss >> arg_; + return 1; + } +private: + T &arg_; +}; + +class CommandLineArgFlag : public CommandLineArg { +public: + CommandLineArgFlag(std::string s, std::string l, const char *ht, bool &x) : + CommandLineArg(s, l, ht), arg_(x) { arg_ = false; } + + CommandLineArgFlag(std::string l, const char *ht, bool &x) : + CommandLineArg(l, ht), arg_(x) { arg_ = false; } + + int read(int argc, char **argv) { + __unused(argc, argv); + arg_ = true; + return 0; + } +private: + bool &arg_; +}; + +} \ No newline at end of file diff --git a/sim/simx/core.cpp b/sim/simx/core.cpp new file mode 100644 index 00000000..688fd678 --- /dev/null +++ b/sim/simx/core.cpp @@ -0,0 +1,395 @@ +#include +#include +#include +#include +#include +#include "types.h" +#include "archdef.h" +#include "mem.h" +#include "decode.h" +#include "core.h" +#include "debug.h" + +using namespace vortex; + +Core::Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id) + : id_(id) + , arch_(arch) + , decoder_(decoder) + , mem_(mem) + , shared_mem_(1, SMEM_SIZE) + , inst_in_schedule_("schedule") + , inst_in_fetch_("fetch") + , inst_in_decode_("decode") + , inst_in_issue_("issue") + , inst_in_execute_("execute") + , inst_in_writeback_("writeback") { + in_use_iregs_.resize(arch.num_warps(), 0); + in_use_fregs_.resize(arch.num_warps(), 0); + in_use_vregs_.reset(); + + csrs_.resize(arch_.num_csrs(), 0); + + fcsrs_.resize(arch_.num_warps(), 0); + + barriers_.resize(arch_.num_barriers(), 0); + + warps_.resize(arch_.num_warps()); + for (int i = 0; i < arch_.num_warps(); ++i) { + warps_[i] = std::make_shared(this, i); + } + + this->clear(); +} + +Core::~Core() { + for (auto& buf : print_bufs_) { + auto str = buf.second.str(); + if (!str.empty()) { + std::cout << "#" << buf.first << ": " << str << std::endl; + } + } +} + +void Core::clear() { + for (int w = 0; w < arch_.num_warps(); ++w) { + in_use_iregs_[w].reset(); + in_use_fregs_[w].reset(); + } + stalled_warps_.reset(); + + in_use_vregs_.reset(); + + for (auto& csr : csrs_) { + csr = 0; + } + + for (auto& fcsr : fcsrs_) { + fcsr = 0; + } + + for (auto& barrier : barriers_) { + barrier.reset(); + } + + for (auto warp : warps_) { + warp->clear(); + } + + inst_in_schedule_.clear(); + inst_in_fetch_.clear(); + inst_in_decode_.clear(); + inst_in_issue_.clear(); + inst_in_execute_.clear(); + inst_in_writeback_.clear(); + print_bufs_.clear(); + + steps_ = 0; + insts_ = 0; + loads_ = 0; + stores_ = 0; + + inst_in_schedule_.valid = true; + warps_[0]->setTmask(0, true); + + ebreak_ = false; +} + +void Core::step() { + D(2, "###########################################################"); + + steps_++; + D(2, std::dec << "Core" << id_ << ": cycle: " << steps_); + + this->writeback(); + this->execute(); + this->issue(); + this->decode(); + this->fetch(); + this->schedule(); + + DPN(2, std::flush); +} + +void Core::schedule() { + if (!inst_in_schedule_.enter(&inst_in_fetch_)) + return; + + bool foundSchedule = false; + int scheduled_warp = inst_in_schedule_.wid; + + for (size_t wid = 0; wid < warps_.size(); ++wid) { + // round robin scheduling + scheduled_warp = (scheduled_warp + 1) % warps_.size(); + bool is_active = warps_[scheduled_warp]->active(); + bool stalled = stalled_warps_[scheduled_warp]; + if (is_active && !stalled) { + foundSchedule = true; + break; + } + } + + if (!foundSchedule) + return; + + D(2, "Schedule: wid=" << scheduled_warp); + inst_in_schedule_.wid = scheduled_warp; + + // advance pipeline + inst_in_schedule_.next(&inst_in_fetch_); +} + +void Core::fetch() { + if (!inst_in_fetch_.enter(&inst_in_issue_)) + return; + + int wid = inst_in_fetch_.wid; + + auto active_threads_b = warps_[wid]->getActiveThreads(); + warps_[wid]->step(&inst_in_fetch_); + auto active_threads_a = warps_[wid]->getActiveThreads(); + + insts_ += active_threads_b; + if (active_threads_b != active_threads_a) { + D(3, "*** warp#" << wid << " active threads changed to " << active_threads_a); + } + + if (inst_in_fetch_.stall_warp) { + D(3, "*** warp#" << wid << " fetch stalled"); + stalled_warps_[wid] = true; + } + + D(4, inst_in_fetch_); + + // advance pipeline + inst_in_fetch_.next(&inst_in_issue_); +} + +void Core::decode() { + if (!inst_in_decode_.enter(&inst_in_issue_)) + return; + + // advance pipeline + inst_in_decode_.next(&inst_in_issue_); +} + +void Core::issue() { + if (!inst_in_issue_.enter(&inst_in_execute_)) + return; + + bool in_use_regs = (inst_in_issue_.used_iregs & in_use_iregs_[inst_in_issue_.wid]) != 0 + || (inst_in_issue_.used_fregs & in_use_fregs_[inst_in_issue_.wid]) != 0 + || (inst_in_issue_.used_vregs & in_use_vregs_) != 0; + + if (in_use_regs) { + D(3, "*** Issue: registers not ready!"); + inst_in_issue_.stalled = true; + return; + } + + switch (inst_in_issue_.rdest_type) { + case 1: + if (inst_in_issue_.rdest) + in_use_iregs_[inst_in_issue_.wid][inst_in_issue_.rdest] = 1; + break; + case 2: + in_use_fregs_[inst_in_issue_.wid][inst_in_issue_.rdest] = 1; + break; + case 3: + in_use_vregs_[inst_in_issue_.rdest] = 1; + break; + default: + break; + } + + // advance pipeline + inst_in_issue_.next(&inst_in_execute_); +} + +void Core::execute() { + if (!inst_in_execute_.enter(&inst_in_writeback_)) + return; + + // advance pipeline + inst_in_execute_.next(&inst_in_writeback_); +} + +void Core::writeback() { + if (!inst_in_writeback_.enter(NULL)) + return; + + switch (inst_in_writeback_.rdest_type) { + case 1: + in_use_iregs_[inst_in_writeback_.wid][inst_in_writeback_.rdest] = 0; + break; + case 2: + in_use_fregs_[inst_in_writeback_.wid][inst_in_writeback_.rdest] = 0; + break; + case 3: + in_use_vregs_[inst_in_writeback_.rdest] = 0; + break; + default: + break; + } + + if (inst_in_writeback_.stall_warp) { + stalled_warps_[inst_in_writeback_.wid] = false; + D(3, "*** warp#" << inst_in_writeback_.wid << " fetch released"); + } + + // advance pipeline + inst_in_writeback_.next(NULL); +} + +DoubleWord Core::get_csr(Addr addr, int tid, int wid) { + if (addr == CSR_FFLAGS) { + return fcsrs_.at(wid) & 0x1F; + } else if (addr == CSR_FRM) { + return (fcsrs_.at(wid) >> 5); + } else if (addr == CSR_FCSR) { + return fcsrs_.at(wid); + } else if (addr == CSR_WTID) { + // Warp threadID + return tid; + } else if (addr == CSR_LTID) { + // Core threadID + return tid + (wid * arch_.num_threads()); + } else if (addr == CSR_GTID) { + // Processor threadID + return tid + (wid * arch_.num_threads()) + + (arch_.num_threads() * arch_.num_warps() * id_); + } else if (addr == CSR_LWID) { + // Core warpID + return wid; + } else if (addr == CSR_GWID) { + // Processor warpID + return wid + (arch_.num_warps() * id_); + } else if (addr == CSR_GCID) { + // Processor coreID + return id_; + } else if (addr == CSR_TMASK) { + // Processor coreID + return warps_.at(wid)->getTmask(); + } else if (addr == CSR_NT) { + // Number of threads per warp + return arch_.num_threads(); + } else if (addr == CSR_NW) { + // Number of warps per core + return arch_.num_warps(); + } else if (addr == CSR_NC) { + // Number of cores + return arch_.num_cores(); + } else if (addr == CSR_MINSTRET) { + // NumInsts + return insts_; + } else if (addr == CSR_MINSTRET_H) { + // NumInsts + return (DoubleWord)(insts_ >> 32); + } else if (addr == CSR_MCYCLE) { + // NumCycles + return (DoubleWord)steps_; + } else if (addr == CSR_MCYCLE_H) { + // NumCycles + return (DoubleWord)(steps_ >> 32); + } else { + return csrs_.at(addr); + } +} + +void Core::set_csr(Addr addr, DoubleWord value, int /*tid*/, int wid) { + if (addr == CSR_FFLAGS) { + fcsrs_.at(wid) = (fcsrs_.at(wid) & ~0x1F) | (value & 0x1F); + } else if (addr == CSR_FRM) { + fcsrs_.at(wid) = (fcsrs_.at(wid) & ~0xE0) | (value << 5); + } else if (addr == CSR_FCSR) { + fcsrs_.at(wid) = value & 0xff; + } else { + csrs_.at(addr) = value; + } +} + +void Core::barrier(int bar_id, int count, int warp_id) { + auto& barrier = barriers_.at(bar_id); + barrier.set(warp_id); + if (barrier.count() < (size_t)count) + return; + for (int i = 0; i < arch_.num_warps(); ++i) { + if (barrier.test(i)) { + warps_.at(i)->activate(); + } + } + barrier.reset(); +} + +// simx64 +Word Core::icache_fetch(Addr addr) { + Word data; + mem_.read(&data, addr, sizeof(Word), 0); + return data; +} + +// simx64 +DoubleWord Core::dcache_read(Addr addr, Size size) { + ++loads_; + DoubleWord data = 0; +#ifdef SM_ENABLE + if ((addr >= (SMEM_BASE_ADDR - SMEM_SIZE)) + && ((addr + 3) < SMEM_BASE_ADDR)) { + shared_mem_.read(&data, addr & (SMEM_SIZE-1), size); + return data; + } +#endif + mem_.read(&data, addr, size, 0); + return data; +} + +void Core::dcache_write(Addr addr, DoubleWord data, Size size) { + ++stores_; +#ifdef SM_ENABLE + if ((addr >= (SMEM_BASE_ADDR - SMEM_SIZE)) + && ((addr + 3) < SMEM_BASE_ADDR)) { + shared_mem_.write(&data, addr & (SMEM_SIZE-1), size); + return; + } +#endif + if (addr >= IO_COUT_ADDR + && addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) { + this->writeToStdOut(addr, data); + return; + } + mem_.write(&data, addr, size, 0); +} + +bool Core::running() const { + return inst_in_fetch_.valid + || inst_in_decode_.valid + || inst_in_issue_.valid + || inst_in_execute_.valid + || inst_in_writeback_.valid; +} + +void Core::printStats() const { + std::cout << "Steps : " << steps_ << std::endl + << "Insts : " << insts_ << std::endl + << "Loads : " << loads_ << std::endl + << "Stores: " << stores_ << std::endl; +} + +void Core::writeToStdOut(Addr addr, DoubleWord data) { + uint32_t tid = (addr - IO_COUT_ADDR) & (IO_COUT_SIZE-1); + auto& ss_buf = print_bufs_[tid]; + char c = (char)data; + ss_buf << c; + if (c == '\n') { + std::cout << std::dec << "#" << tid << ": " << ss_buf.str() << std::flush; + ss_buf.str(""); + } +} + +void Core::trigger_ebreak() { + ebreak_ = true; +} + +bool Core::check_ebreak() const { + return ebreak_; +} \ No newline at end of file diff --git a/sim/simx/core.h b/sim/simx/core.h new file mode 100644 index 00000000..7bfb5c41 --- /dev/null +++ b/sim/simx/core.h @@ -0,0 +1,123 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "debug.h" +#include "types.h" +#include "archdef.h" +#include "decode.h" +#include "mem.h" +#include "warp.h" +#include "pipeline.h" + +namespace vortex { + +class Core { +public: + Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id); + + ~Core(); + + void clear(); + + bool running() const; + + void step(); + + void printStats() const; + + Word id() const { + return id_; + } + + Warp& warp(int i) { + return *warps_.at(i); + } + + Decoder& decoder() { + return decoder_; + } + + const ArchDef& arch() const { + return arch_; + } + + unsigned long num_insts() const { + return insts_; + } + + unsigned long num_steps() const { + return steps_; + } + + Word getIRegValue(int reg) const { + return warps_[0]->getIRegValue(reg); + } + + DoubleWord get_csr(Addr addr, int tid, int wid); + + void set_csr(Addr addr, DoubleWord value, int tid, int wid); + + void barrier(int bar_id, int count, int warp_id); + + // simx64 + Word icache_fetch(Addr); + // simx64 + DoubleWord dcache_read(Addr, Size); + // simx64 + void dcache_write(Addr, DoubleWord, Size); + + void trigger_ebreak(); + bool check_ebreak() const; + +private: + + void schedule(); + void fetch(); + void decode(); + void issue(); + void execute(); + void writeback(); + + void writeToStdOut(Addr addr, DoubleWord data); + + std::vector in_use_iregs_; + std::vector in_use_fregs_; + RegMask in_use_vregs_; + WarpMask stalled_warps_; + std::vector> warps_; + std::vector barriers_; + std::vector csrs_; + std::vector fcsrs_; + std::unordered_map print_bufs_; + + Word id_; + const ArchDef &arch_; + Decoder &decoder_; + MemoryUnit &mem_; +#ifdef SM_ENABLE + RAM shared_mem_; +#endif + + bool ebreak_; + + Pipeline inst_in_schedule_; + Pipeline inst_in_fetch_; + Pipeline inst_in_decode_; + Pipeline inst_in_issue_; + Pipeline inst_in_execute_; + Pipeline inst_in_writeback_; + + uint64_t steps_; + uint64_t insts_; + uint64_t loads_; + uint64_t stores_; +}; + +} // namespace vortex \ No newline at end of file diff --git a/sim/simx/debug.h b/sim/simx/debug.h new file mode 100644 index 00000000..94a31804 --- /dev/null +++ b/sim/simx/debug.h @@ -0,0 +1,43 @@ +#pragma once + +#ifndef DEBUG_LEVEL +#define DEBUG_LEVEL 4 +#endif + +#define DEBUG_HEADER << "DEBUG " +//#define DEBUG_HEADER << "DEBUG " << __FILE__ << ':' << std::dec << __LINE__ << ": " + +#ifndef NDEBUG + +#include +#include + +#define DX(x) x + +#define D(lvl, x) do { \ + if ((lvl) <= DEBUG_LEVEL) { \ + std::cout DEBUG_HEADER << x << std::endl; \ + } \ +} while(0) + +#define DPH(lvl, x) do { \ + if ((lvl) <= DEBUG_LEVEL) { \ + std::cout DEBUG_HEADER << x; \ + } \ +} while(0) + +#define DPN(lvl, x) do { \ + if ((lvl) <= DEBUG_LEVEL) { \ + std::cout << x; \ + } \ +} while(0) + +#else + +#define DX(x) +#define D(lvl, x) do {} while(0) +#define DPH(lvl, x) do {} while(0) +#define DPN(lvl, x) do {} while(0) +#define D_RAW(x) do {} while(0) + +#endif \ No newline at end of file diff --git a/sim/simx/decode.cpp b/sim/simx/decode.cpp new file mode 100644 index 00000000..854f0935 --- /dev/null +++ b/sim/simx/decode.cpp @@ -0,0 +1,600 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "debug.h" +#include "types.h" +#include "decode.h" +#include "archdef.h" +#include "instr.h" + +using namespace vortex; + +struct InstTableEntry_t { + bool controlFlow; + InstType iType; +}; + +static const std::unordered_map sc_instTable = { + {Opcode::NOP, {false, InstType::N_TYPE}}, + {Opcode::R_INST, {false, InstType::R_TYPE}}, + {Opcode::L_INST, {false, InstType::I_TYPE}}, + {Opcode::I_INST, {false, InstType::I_TYPE}}, + {Opcode::S_INST, {false, InstType::S_TYPE}}, + {Opcode::B_INST, {true , InstType::B_TYPE}}, + {Opcode::LUI_INST, {false, InstType::U_TYPE}}, + {Opcode::AUIPC_INST, {false, InstType::U_TYPE}}, + {Opcode::JAL_INST, {true , InstType::J_TYPE}}, + {Opcode::JALR_INST, {true , InstType::I_TYPE}}, + {Opcode::SYS_INST, {true , InstType::I_TYPE}}, + {Opcode::FENCE, {true , InstType::I_TYPE}}, + {Opcode::FL, {false, InstType::I_TYPE}}, + {Opcode::FS, {false, InstType::S_TYPE}}, + {Opcode::FCI, {false, InstType::R_TYPE}}, + {Opcode::FMADD, {false, InstType::R4_TYPE}}, + {Opcode::FMSUB, {false, InstType::R4_TYPE}}, + {Opcode::FMNMADD, {false, InstType::R4_TYPE}}, + {Opcode::FMNMSUB, {false, InstType::R4_TYPE}}, + {Opcode::VSET, {false, InstType::V_TYPE}}, + {Opcode::GPGPU, {false, InstType::R_TYPE}}, + {Opcode::R_INST_64, {false, InstType::R_TYPE}}, + {Opcode::I_INST_64, {false, InstType::I_TYPE}}, +}; + +static const char* op_string(const Instr &instr) { + // simx64 + Word func2 = instr.getFunc2(); + Word func3 = instr.getFunc3(); + Word func7 = instr.getFunc7(); + Word rs2 = instr.getRSrc(1); + DoubleWord imm = instr.getImm(); + switch (instr.getOpcode()) { + case Opcode::NOP: return "NOP"; + case Opcode::LUI_INST: return "LUI"; + case Opcode::AUIPC_INST: return "AUIPC"; + case Opcode::R_INST: + if (func7 & 0x1) { + switch (func3) { + case 0: return "MUL"; + case 1: return "MULH"; + case 2: return "MULHSU"; + case 3: return "MULHU"; + case 4: return "DIV"; + case 5: return "DIVU"; + case 6: return "REM"; + case 7: return "REMU"; + } + } else { + switch (func3) { + case 0: return func7 ? "SUB" : "ADD"; + case 1: return "SLL"; + case 2: return "SLT"; + case 3: return "SLTU"; + case 4: return "XOR"; + case 5: return func7 ? "SRA" : "SRL"; + case 6: return "OR"; + case 7: return "AND"; + } + } + case Opcode::I_INST: + switch (func3) { + case 0: return "ADDI"; + case 1: return "SLLI"; + case 2: return "SLTI"; + case 3: return "SLTIU"; + case 4: return "XORI"; + case 5: return func7 ? "SRAI" : "SRLI"; + case 6: return "ORI"; + case 7: return "ANDI"; + } + case Opcode::B_INST: + switch (func3) { + case 0: return "BEQ"; + case 1: return "BNE"; + case 4: return "BLT"; + case 5: return "BGE"; + case 6: return "BLTU"; + case 7: return "BGEU"; + default: + std::abort(); + } + case Opcode::JAL_INST: return "JAL"; + case Opcode::JALR_INST: return "JALR"; + case Opcode::L_INST: + switch (func3) { + case 0: return "LBI"; + case 1: return "LHI"; + case 2: return "LW"; + // simx64 + case 3: return "LD"; + case 4: return "LBU"; + case 5: return "LHU"; + // simx64 + case 6: return "LWU"; + default: + std::abort(); + } + case Opcode::S_INST: + switch (func3) { + case 0: return "SB"; + case 1: return "SH"; + case 2: return "SW"; + // simx64 + case 3: return "SD"; + default: + std::abort(); + } + // simx64 + case Opcode::R_INST_64: + if (func7 & 0x1){ + switch (func3) { + case 0: return func7 ? "SUBW" : "ADDW"; + case 1: return "SLLW"; + case 5: return func7 ? "SRAW" : "SRLW"; + default: + std::abort(); + } + } else { + switch (func3) { + case 0: return "MULW"; + case 4: return "DIVW"; + case 5: return "DIVUW"; + case 6: return "REMW"; + case 7: return "REMUW"; + default: + std::abort(); + } + } + // simx64 + case Opcode::I_INST_64: + switch (func3) { + case 0: return "ADDIW"; + case 1: return "SLLIW"; + case 5: return func7 ? "SRAIW" : "SRLIW"; + default: + std::abort(); + } + case Opcode::SYS_INST: + switch (func3) { + case 0: return imm ? "EBREAK" : "ECALL"; + case 1: return "CSRRW"; + case 2: return "CSRRS"; + case 3: return "CSRRC"; + case 5: return "CSRRWI"; + case 6: return "CSRRSI"; + case 7: return "CSRRCI"; + default: + std::abort(); + } + case Opcode::FENCE: return "FENCE"; + // simx64 + case Opcode::FL: + switch (func3) { + case 0x1: return "VL"; + case 0x2: return "FLW"; + case 0x3: return "FLD"; + default: + std::abort(); + } + case Opcode::FS: + switch (func3) { + case 0x1: return "VS"; + case 0x2: return "FSW"; + case 0x3: return "FSD"; + default: + std::abort(); + } + case Opcode::FCI: + switch (func7) { + case 0x00: return "FADD.S"; + case 0x01: return "FADD.D"; + case 0x04: return "FSUB.S"; + case 0x05: return "FSUB.D"; + case 0x08: return "FMUL.S"; + case 0x09: return "FMUL.D"; + case 0x0c: return "FDIV.S"; + case 0x0d: return "FDIV.D"; + case 0x2c: return "FSQRT.S"; + case 0x2d: return "FSQRT.D"; + case 0x10: + switch (func3) { + case 0: return "FSGNJ.S"; + case 1: return "FSGNJN.S"; + case 2: return "FSGNJX.S"; + default: + std::abort(); + } + case 0x11: + switch (func3) { + case 0: return "FSGNJ.D"; + case 1: return "FSGNJN.D"; + case 2: return "FSGNJX.D"; + default: + std::abort(); + } + case 0x14: + switch (func3) { + case 0: return "FMIN.S"; + case 1: return "FMAX.S"; + default: + std::abort(); + } + case 0x15: + switch (func3) { + case 0: return "FMIN.D"; + case 1: return "FMAX.D"; + default: + std::abort(); + } + case 0x20: return "FCVT.S.D"; + case 0x21: return "FCVT.D.S"; + case 0x50: + switch (func3) { + case 0: return "FLE.S"; + case 1: return "FLT.S"; + case 2: return "FEQ.S"; + default: + std::abort(); + } + case 0x51: + switch (func3) { + case 0: return "FLE.D"; + case 1: return "FLT.D"; + case 2: return "FEQ.D"; + default: + std::abort(); + } + // simx64 + case 0x60: + switch (rs2) { + case 0: return "FCVT.W.S"; + case 1: return "FCVT.WU.S"; + case 2: return "FCVT.L.S"; + case 3: return "FCVT.LU.S"; + default: + std::abort(); + } + case 0x61: + switch (rs2) { + case 0: return "FCVT.W.D"; + case 1: return "FCVT.WU.D"; + case 2: return "FCVT.L.D"; + case 3: return "FCVT.LU.D"; + default: + std::abort(); + } + case 0x68: + switch (rs2) { + case 0: return "FCVT.S.W"; + case 1: return "FCVT.S.WU"; + case 2: return "FCVT.S.L"; + case 3: return "FCVT.S.LU"; + default: + std::abort(); + } + case 0x69: + switch (rs2) { + case 0: return "FCVT.D.W"; + case 1: return "FCVT.D.WU"; + case 2: return "FCVT.D.L"; + case 3: return "FCVT.D.LU"; + default: + std::abort(); + } + case 0x70: return func3 ? "FCLASS.S" : "FMV.X.W"; + case 0x71: return func3 ? "FCLASS.D" : "FMV.X.D"; + case 0x78: return "FMV.W.X"; + case 0x79: return "FMV.D.X"; + default: + std::abort(); + } + case Opcode::FMADD: return func2 ? "FMADD.D" : "FMADD.S"; + case Opcode::FMSUB: return func2 ? "FMSUB.D" : "FMSUB.S"; + case Opcode::FMNMADD: return func2 ? "FNMADD.D" : "FNMADD.S"; + case Opcode::FMNMSUB: return func2 ? "FNMSUB.D" : "FNMSUB.S"; + case Opcode::VSET: return "VSET"; + case Opcode::GPGPU: + switch (func3) { + case 0: return "TMC"; + case 1: return "WSPAWN"; + case 2: return "SPLIT"; + case 3: return "JOIN"; + case 4: return "BAR"; + case 6: return "PREFETCH"; + default: + std::abort(); + } + default: + std::abort(); + } +} + +namespace vortex { +std::ostream &operator<<(std::ostream &os, const Instr &instr) { + os << op_string(instr) << ": "; + auto opcode = instr.getOpcode(); + + auto rd_to_string = [&]() { + int rdt = instr.getRDType(); + int rd = instr.getRDest(); + switch (rdt) { + case 1: os << "r" << std::dec << rd << " <- "; break; + case 2: os << "fr" << std::dec << rd << " <- "; break; + case 3: os << "vr" << std::dec << rd << " <- "; break; + default: break; + } + }; + + auto rs_to_string = [&](int i) { + int rst = instr.getRSType(i); + int rs = instr.getRSrc(i); + switch (rst) { + case 1: os << "r" << std::dec << rs; break; + case 2: os << "fr" << std::dec << rs; break; + case 3: os << "vr" << std::dec << rs; break; + default: break; + } + }; + + if (opcode == S_INST + || opcode == FS + || opcode == VS) { + os << "M[r" << std::dec << instr.getRSrc(0) << " + 0x" << std::hex << instr.getImm() << "] <- "; + rs_to_string(1); + } else + if (opcode == L_INST + || opcode == FL + || opcode == VL) { + rd_to_string(); + os << "M[r" << std::dec << instr.getRSrc(0) << " + 0x" << std::hex << instr.getImm() << "]"; + } else { + rd_to_string(); + int i = 0; + for (; i < instr.getNRSrc(); ++i) { + if (i) os << ", "; + rs_to_string(i); + } + if (instr.hasImm()) { + if (i) os << ", "; + os << "imm=0x" << std::hex << instr.getImm(); + } + } + + return os; +} +} + +Decoder::Decoder(const ArchDef &arch) { + // simx64 + inst_s_ = arch.wsize() * 4; + opcode_s_ = 7; + reg_s_ = 5; + func2_s_ = 2; + func3_s_ = 3; + mop_s_ = 3; + vmask_s_ = 1; + + shift_opcode_ = 0; + shift_rd_ = opcode_s_; + shift_func3_ = shift_rd_ + reg_s_; + shift_rs1_ = shift_func3_ + func3_s_; + shift_rs2_ = shift_rs1_ + reg_s_; + shift_func7_ = shift_rs2_ + reg_s_; + shift_rs3_ = shift_func7_ + func2_s_; + shift_vmop_ = shift_func7_ + vmask_s_; + shift_vnf_ = shift_vmop_ + mop_s_; + shift_func6_ = shift_func7_ + 1; + shift_vset_ = shift_func7_ + 6; + + reg_mask_ = 0x1f; + // simx64 + func2_mask_ = 0x3; + func3_mask_ = 0x7; + func6_mask_ = 0x3f; + func7_mask_ = 0x7f; + opcode_mask_ = 0x7f; + i_imm_mask_ = 0xfff; + s_imm_mask_ = 0xfff; + b_imm_mask_ = 0x1fff; + u_imm_mask_ = 0xfffff; + j_imm_mask_ = 0xfffff; + v_imm_mask_ = 0x7ff; +} + +// simx64 +std::shared_ptr Decoder::decode(Word code, Word PC) { + auto instr = std::make_shared(); + Opcode op = (Opcode)((code >> shift_opcode_) & opcode_mask_); + instr->setOpcode(op); + + // simx64 + Word func2 = (code >> shift_func7_) & func2_mask_; + Word func3 = (code >> shift_func3_) & func3_mask_; + Word func6 = (code >> shift_func6_) & func6_mask_; + Word func7 = (code >> shift_func7_) & func7_mask_; + + int rd = (code >> shift_rd_) & reg_mask_; + int rs1 = (code >> shift_rs1_) & reg_mask_; + int rs2 = (code >> shift_rs2_) & reg_mask_; + int rs3 = (code >> shift_rs3_) & reg_mask_; + + auto op_it = sc_instTable.find(op); + if (op_it == sc_instTable.end()) { + std::cout << std::hex << "invalid opcode: 0x" << op << ", instruction=0x" << code << ", PC=" << PC << std::endl; + std::abort(); + } + + auto iType = op_it->second.iType; + if (op == Opcode::FL || op == Opcode::FS) { + // simx64 + if (func3 != 0x2 && func3 != 0x3) { + iType = InstType::V_TYPE; + } + } + + switch (iType) { + case InstType::N_TYPE: + break; + + case InstType::R_TYPE: + if (op == Opcode::FCI) { + switch (func7) { + case 0x68: // FCVT.S.W, FCVT.S.WU, FCVT.S.L, FCVT.S.LU + case 0x69: // FCVT.D.W, FCVT.D.WU, FCVT.D.L, FCVT.D.LU + case 0x78: // FMV.W.X + case 0x79: // FMV.D.X + instr->setSrcReg(rs1); + break; + default: + instr->setSrcFReg(rs1); + } + instr->setSrcFReg(rs2); + switch (func7) { + case 0x50: // FLE.S, FLT.S, FEQ.S + case 0x51: // FLE.D, FLT.D, FEQ.D + case 0x60: // FCVT.WU.S, FCVT.W.S, FCVT.L.S, FCVT.LU.S + case 0x61: // FCVT.W.D, FCVT.WU.D, FCVT.L.D, FCVT.LU.D + case 0x70: // FLASS.S, FMV.X.W + case 0x71: // FCLASS.D, FMV.X.D + instr->setDestReg(rd); + break; + default: + instr->setDestFReg(rd); + } + } else { + instr->setDestReg(rd); + instr->setSrcReg(rs1); + instr->setSrcReg(rs2); + } + instr->setFunc3(func3); + instr->setFunc7(func7); + break; + + case InstType::I_TYPE: { + instr->setSrcReg(rs1); + if (op == Opcode::FL) { + instr->setDestFReg(rd); + } else { + instr->setDestReg(rd); + } + instr->setFunc3(func3); + instr->setFunc7(func7); + if ((func3 == 5) && (op != L_INST) && (op != Opcode::FL)) { + instr->setImm(signExt(rs2, 6, 0x3F)); + } else { + instr->setImm(signExt(code >> shift_rs2_, 12, i_imm_mask_)); + } + } break; + + case InstType::S_TYPE: { + instr->setSrcReg(rs1); + if (op == Opcode::FS) { + instr->setSrcFReg(rs2); + } else { + instr->setSrcReg(rs2); + } + instr->setFunc3(func3); + DoubleWord imeed = (func7 << reg_s_) | rd; + instr->setImm(signExt(imeed, 12, s_imm_mask_)); + } break; + + case InstType::B_TYPE: { + instr->setSrcReg(rs1); + instr->setSrcReg(rs2); + instr->setFunc3(func3); + Word bit_11 = rd & 0x1; + Word bits_4_1 = rd >> 1; + Word bit_10_5 = func7 & 0x3f; + Word bit_12 = func7 >> 6; + DoubleWord imeed = (bits_4_1 << 1) | (bit_10_5 << 5) | (bit_11 << 11) | (bit_12 << 12); + instr->setImm(signExt(imeed, 13, b_imm_mask_)); + } break; + + case InstType::U_TYPE: + instr->setDestReg(rd); + instr->setImm(signExt(code >> shift_func3_, 20, u_imm_mask_)); + break; + + case InstType::J_TYPE: { + instr->setDestReg(rd); + Word unordered = code >> shift_func3_; + Word bits_19_12 = unordered & 0xff; + Word bit_11 = (unordered >> 8) & 0x1; + Word bits_10_1 = (unordered >> 9) & 0x3ff; + Word bit_20 = (unordered >> 19) & 0x1; + DoubleWord imeed = 0 | (bits_10_1 << 1) | (bit_11 << 11) | (bits_19_12 << 12) | (bit_20 << 20); + if (bit_20) { + imeed |= ~j_imm_mask_; + } + instr->setImm(imeed); + } break; + + case InstType::V_TYPE: + switch (op) { + case Opcode::VSET: { + instr->setDestVReg(rd); + instr->setSrcVReg(rs1); + instr->setFunc3(func3); + if (func3 == 7) { + instr->setImm(!(code >> shift_vset_)); + if (instr->getImm()) { + Word immed = (code >> shift_rs2_) & v_imm_mask_; + instr->setImm(immed); + instr->setVlmul(immed & 0x3); + instr->setVediv((immed >> 4) & 0x3); + instr->setVsew((immed >> 2) & 0x3); + } else { + instr->setSrcVReg(rs2); + } + } else { + instr->setSrcVReg(rs2); + instr->setVmask((code >> shift_func7_) & 0x1); + instr->setFunc6(func6); + } + } break; + + case Opcode::VL: + instr->setDestVReg(rd); + instr->setSrcVReg(rs1); + instr->setVlsWidth(func3); + instr->setSrcVReg(rs2); + instr->setVmask(code >> shift_func7_); + instr->setVmop((code >> shift_vmop_) & func3_mask_); + instr->setVnf((code >> shift_vnf_) & func3_mask_); + break; + + case Opcode::VS: + instr->setVs3(rd); + instr->setSrcVReg(rs1); + instr->setVlsWidth(func3); + instr->setSrcVReg(rs2); + instr->setVmask(code >> shift_func7_); + instr->setVmop((code >> shift_vmop_) & func3_mask_); + instr->setVnf((code >> shift_vnf_) & func3_mask_); + break; + + default: + std::abort(); + } + break; + case R4_TYPE: + instr->setDestFReg(rd); + instr->setSrcFReg(rs1); + instr->setSrcFReg(rs2); + instr->setSrcFReg(rs3); + instr->setFunc3(func3); + // simx64 + instr->setFunc2(func2); + break; + default: + std::abort(); + } + + D(2, "Instr 0x" << std::hex << code << ": " << *instr << std::flush); + + return instr; +} diff --git a/sim/simx/decode.h b/sim/simx/decode.h new file mode 100644 index 00000000..f8f3909c --- /dev/null +++ b/sim/simx/decode.h @@ -0,0 +1,61 @@ +#pragma once + +#include +#include + +namespace vortex { + +class ArchDef; +class Instr; +class Pipeline; + +class Decoder { +public: + Decoder(const ArchDef &); + + std::shared_ptr decode(Word code, Word PC); + +private: + + Word inst_s_; + Word opcode_s_; + Word reg_s_; + Word func2_s_; + Word func3_s_; + Word shift_opcode_; + Word shift_rd_; + Word shift_rs1_; + Word shift_rs2_; + Word shift_rs3_; + Word shift_func2_; + Word shift_func3_; + Word shift_func7_; + Word shift_j_u_immed_; + Word shift_s_b_immed_; + Word shift_i_immed_; + + Word reg_mask_; + Word func2_mask_; + Word func3_mask_; + Word func6_mask_; + Word func7_mask_; + Word opcode_mask_; + Word i_imm_mask_; + Word s_imm_mask_; + Word b_imm_mask_; + Word u_imm_mask_; + Word j_imm_mask_; + Word v_imm_mask_; + + //Vector + Word shift_vset_; + Word shift_vset_immed_; + Word shift_vmask_; + Word shift_vmop_; + Word shift_vnf_; + Word shift_func6_; + Word vmask_s_; + Word mop_s_; +}; + +} \ No newline at end of file diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp new file mode 100644 index 00000000..5852b3a2 --- /dev/null +++ b/sim/simx/execute.cpp @@ -0,0 +1,1902 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "warp.h" +#include "instr.h" +#include "core.h" + +using namespace vortex; + +static bool HasDivergentThreads(const ThreadMask &thread_mask, + const std::vector> ®_file, + unsigned reg) { + bool cond; + size_t thread_idx = 0; + size_t num_threads = reg_file.size(); + for (; thread_idx < num_threads; ++thread_idx) { + if (thread_mask[thread_idx]) { + cond = bool(reg_file[thread_idx][reg]); + break; + } + } + assert(thread_idx != num_threads); + for (; thread_idx < num_threads; ++thread_idx) { + if (thread_mask[thread_idx]) { + if (cond != (bool(reg_file[thread_idx][reg]))) { + return true; + } + } + } + return false; +} + +inline uint32_t get_fpu_rm(uint32_t func3, Core* core, uint32_t tid, uint32_t wid) { + return (func3 == 0x7) ? core->get_csr(CSR_FRM, tid, wid) : func3; +} + +inline void update_fcrs(uint32_t fflags, Core* core, uint32_t tid, uint32_t wid) { + if (fflags) { + core->set_csr(CSR_FCSR, core->get_csr(CSR_FCSR, tid, wid) | fflags, tid, wid); + core->set_csr(CSR_FFLAGS, core->get_csr(CSR_FFLAGS, tid, wid) | fflags, tid, wid); + } +} + +void Warp::execute(const Instr &instr, Pipeline *pipeline) { + assert(tmask_.any()); + + // simx64 + DoubleWord nextPC = PC_ + 4; + bool runOnce = false; + + Word func3 = instr.getFunc3(); + Word func6 = instr.getFunc6(); + Word func7 = instr.getFunc7(); + Word func2 = instr.getFunc2(); + + auto opcode = instr.getOpcode(); + int rdest = instr.getRDest(); + int rsrc0 = instr.getRSrc(0); + int rsrc1 = instr.getRSrc(1); + DoubleWord immsrc= instr.getImm(); + DoubleWord vmask = instr.getVmask(); + + int num_threads = core_->arch().num_threads(); + for (int t = 0; t < num_threads; t++) { + if (!tmask_.test(t) || runOnce) + continue; + + auto &iregs = iRegFile_.at(t); + auto &fregs = fRegFile_.at(t); + + DoubleWord rsdata[3]; + DoubleWord rddata; + + int num_rsrcs = instr.getNRSrc(); + if (num_rsrcs) { + DPH(2, "[" << std::dec << t << "] Src Regs: "); + for (int i = 0; i < num_rsrcs; ++i) { + int rst = instr.getRSType(i); + int rs = instr.getRSrc(i); + if (i) DPN(2, ", "); + switch (rst) { + case 1: + rsdata[i] = iregs[rs]; + DPN(2, "r" << std::dec << rs << "=0x" << std::hex << rsdata[i]); + break; + case 2: + rsdata[i] = fregs[rs]; + DPN(2, "fr" << std::dec << rs << "=0x" << std::hex << rsdata[i]); + break; + default: break; + } + } + DPN(2, std::endl); + } + + bool rd_write = false; + + switch (opcode) { + case NOP: + break; + case LUI_INST: + rddata = (immsrc << 12) & 0xfffffffffffff000; + rd_write = true; + break; + case AUIPC_INST: + // simx64 + rddata = ((immsrc << 12) & 0xfffffffffffff000) + PC_; + rd_write = true; + break; + case R_INST: { + if (func7 & 0x1) { + switch (func3) { + case 0: + // RV32M: MUL + rddata = ((DoubleWordI)rsdata[0]) * ((DoubleWordI)rsdata[1]); + break; + case 1: { + // RV32M: MULH + __int128_t first = signExt128((__int128_t)rsdata[0], 64, 0xFFFFFFFFFFFFFFFF); + __int128_t second = signExt128((__int128_t)rsdata[1], 64, 0xFFFFFFFFFFFFFFFF); + __uint128_t result = first * second; + rddata = (result >> 64) & 0xFFFFFFFFFFFFFFFF; + } break; + case 2: { + // RV32M: MULHSU + __int128_t first = signExt128((__int128_t)rsdata[0], 64, 0xFFFFFFFFFFFFFFFF); + __int128_t second = (__int128_t)rsdata[1]; + __uint128_t result = first * second; + rddata = (result >> 64) & 0xFFFFFFFFFFFFFFFF; + } break; + case 3: { + // RV32M: MULHU + __uint128_t first = (__uint128_t)rsdata[0]; + __uint128_t second = (__uint128_t)rsdata[1]; + rddata = ((first * second) >> 64) & 0xFFFFFFFFFFFFFFFF; + } break; + case 4: { + // RV32M: DIV + DoubleWordI dividen = rsdata[0]; + DoubleWordI divisor = rsdata[1]; + if (divisor == 0) { + rddata = -1; + } else if (dividen == DoubleWordI(0x8000000000000000) && divisor == DoubleWordI(0xFFFFFFFFFFFFFFFF)) { + rddata = dividen; + } else { + rddata = dividen / divisor; + } + } break; + case 5: { + // RV32M: DIVU + DoubleWord dividen = rsdata[0]; + DoubleWord divisor = rsdata[1]; + if (divisor == 0) { + rddata = -1; + } else { + rddata = dividen / divisor; + } + } break; + case 6: { + // RV32M: REM + DoubleWordI dividen = rsdata[0]; + DoubleWordI divisor = rsdata[1]; + if (divisor == 0) { + rddata = dividen; + } else if (dividen == DoubleWordI(0x8000000000000000) && divisor == DoubleWordI(0xFFFFFFFFFFFFFFFF)) { + rddata = 0; + } else { + rddata = dividen % divisor; + } + } break; + case 7: { + // RV32M: REMU + DoubleWord dividen = rsdata[0]; + DoubleWord divisor = rsdata[1]; + if (divisor == 0) { + rddata = dividen; + } else { + rddata = dividen % divisor; + } + } break; + default: + std::cout << "unsupported MUL/DIV instr\n"; + std::abort(); + } + } else { + switch (func3) { + case 0: + if (func7) { + // RV32I: SUB + rddata = rsdata[0] - rsdata[1]; + } else { + // RV32I: ADD + rddata = rsdata[0] + rsdata[1]; + } + break; + case 1: + // RV32I: SLL + rddata = rsdata[0] << rsdata[1]; + break; + case 2: + // RV32I: SLT (signed) + rddata = (DoubleWordI(rsdata[0]) < DoubleWordI(rsdata[1])); + break; + case 3: + // RV32I: SLTU (unsigned) + rddata = (DoubleWord(rsdata[0]) < DoubleWord(rsdata[1])); + break; + case 4: + // RV32I: XOR + rddata = rsdata[0] ^ rsdata[1]; + break; + case 5: + if (func7) { + // RV32I: SRA + rddata = DoubleWordI(rsdata[0]) >> DoubleWordI(rsdata[1]); + } else { + // RV32I: SRL + rddata = DoubleWord(rsdata[0]) >> DoubleWord(rsdata[1]); + } + break; + case 6: + // RV32I: OR + rddata = rsdata[0] | rsdata[1]; + break; + case 7: + // RV32I: AND + rddata = rsdata[0] & rsdata[1]; + break; + default: + std::abort(); + } + } + rd_write = true; + } break; + case I_INST: + switch (func3) { + case 0: + // RV32I: ADDI + rddata = rsdata[0] + immsrc; + break; + case 1: + // RV64I: SLLI + rddata = rsdata[0] << immsrc; + break; + case 2: + // RV32I: SLTI + rddata = (DoubleWordI(rsdata[0]) < DoubleWordI(immsrc)); + break; + case 3: { + // RV32I: SLTIU + rddata = (DoubleWord(rsdata[0]) < DoubleWord(immsrc)); + } break; + case 4: + // RV32I: XORI + rddata = rsdata[0] ^ immsrc; + break; + case 5: + if (func7) { + // RV64I: SRAI + // rs1 shifted by lower 6 bits of immsrc + DoubleWord result = DoubleWordI(rsdata[0]) >> immsrc; + rddata = result; + } else { + // RV64I: SRLI + // rs1 shifted by lower 6 bits of immsrc + DoubleWord result = DoubleWord(rsdata[0]) >> immsrc; + rddata = result; + } + break; + case 6: + // RV32I: ORI + rddata = rsdata[0] | immsrc; + break; + case 7: + // RV32I: ANDI + rddata = rsdata[0] & immsrc; + break; + default: + std::abort(); + } + rd_write = true; + break; + case B_INST: + switch (func3) { + case 0: + // RV32I: BEQ + if (rsdata[0] == rsdata[1]) { + nextPC = PC_ + immsrc; + } + break; + case 1: + // RV32I: BNE + if (rsdata[0] != rsdata[1]) { + nextPC = PC_ + immsrc; + } + break; + case 4: + // RV32I: BLT + if (DoubleWordI(rsdata[0]) < DoubleWordI(rsdata[1])) { + nextPC = PC_ + immsrc; + } + break; + case 5: + // RV32I: BGE + if (DoubleWordI(rsdata[0]) >= DoubleWordI(rsdata[1])) { + nextPC = PC_ + immsrc; + } + break; + case 6: + // RV32I: BLTU + if (DoubleWord(rsdata[0]) < DoubleWord(rsdata[1])) { + nextPC = PC_ + immsrc; + } + break; + case 7: + // RV32I: BGEU + if (DoubleWord(rsdata[0]) >= DoubleWord(rsdata[1])) { + nextPC = PC_ + immsrc; + } + break; + } + pipeline->stall_warp = true; + runOnce = true; + break; + // RV32I: JAL + case JAL_INST: + rddata = nextPC; + nextPC = PC_ + immsrc; + pipeline->stall_warp = true; + runOnce = true; + rd_write = true; + break; + // RV32I: JALR + case JALR_INST: + rddata = nextPC; + nextPC = DoubleWord(rsdata[0]) + DoubleWord(immsrc); + pipeline->stall_warp = true; + runOnce = true; + rd_write = true; + break; + case L_INST: { + DoubleWord memAddr = ((rsdata[0] + immsrc) & 0xFFFFFFF8); // DoubleWord aligned + DoubleWord shift_by = ((rsdata[0] + immsrc) & 0x00000007) * 8; + DoubleWord data_read = core_->dcache_read(memAddr, 8); + D(3, "LOAD MEM: ADDRESS=0x" << std::hex << memAddr << ", DATA=0x" << data_read); + switch (func3) { + case 0: + // RV32I: LBI + rddata = signExt((data_read >> shift_by) & 0xFF, 8, 0xFF); + break; + case 1: + // RV32I: LHI + rddata = signExt((data_read >> shift_by) & 0xFFFF, 16, 0xFFFF); + break; + case 2: + // RV32I: LW + rddata = signExt((data_read >> shift_by) & 0xFFFFFFFF, 32, 0xFFFFFFFF); + break; + case 3: + // RV64I: LD + rddata = DoubleWord(data_read); + break; + case 4: + // RV32I: LBU + rddata = DoubleWord((data_read >> shift_by) & 0xFF); + break; + case 5: + // RV32I: LHU + rddata = DoubleWord((data_read >> shift_by) & 0xFFFF); + break; + case 6: + // RV64I: LWU + rddata = DoubleWord((data_read >> shift_by) & 0xFFFFFFFF); + break; + default: + std::abort(); + } + rd_write = true; + } break; + case S_INST: { + DoubleWord memAddr = rsdata[0] + immsrc; + D(3, "STORE MEM: ADDRESS=0x" << std::hex << memAddr); + switch (func3) { + case 0: + // RV32I: SB + core_->dcache_write(memAddr, rsdata[1] & 0x000000FF, 1); + break; + case 1: + // RV32I: SH + core_->dcache_write(memAddr, rsdata[1] & 0x0000FFFF, 2); + break; + case 2: + // RV32I: SW + core_->dcache_write(memAddr, rsdata[1] & 0xFFFFFFFF, 4); + break; + case 3: + // RV64I: SD + core_ ->dcache_write(memAddr, rsdata[1], 8); + break; + default: + std::abort(); + } + } break; + // simx64 + case R_INST_64: { + if (func7 & 0x1){ + switch (func3) { + case 0: + // RV64M: MULW + rddata = signExt((WordI)rsdata[0] * (WordI)rsdata[1], 32, 0xFFFFFFFF); + break; + case 4: { + // RV64M: DIVW + int32_t dividen = (WordI) rsdata[0]; + int32_t divisor = (WordI) rsdata[1]; + if (divisor == 0){ + rddata = -1; + } else if (dividen == WordI(0x80000000) && divisor == WordI(0xFFFFFFFF)) { + rddata = signExt(dividen, 32, 0xFFFFFFFF); + } else { + rddata = signExt(dividen / divisor, 32, 0xFFFFFFFF); + } + } break; + case 5: { + // RV64M: DIVUW + uint32_t dividen = (Word) rsdata[0]; + uint32_t divisor = (Word) rsdata[1]; + if (divisor == 0){ + rddata = -1; + } else { + rddata = signExt(dividen / divisor, 32, 0xFFFFFFFF); + } + } break; + case 6: { + // RV64M: REMW + int32_t dividen = (WordI) rsdata[0]; + int32_t divisor = (WordI) rsdata[1]; + if (divisor == 0){ + rddata = signExt(dividen, 32, 0xFFFFFFFF); + } else if (dividen == WordI(0x80000000) && divisor == WordI(0xFFFFFFFF)) { + rddata = 0; + } else { + rddata = signExt(dividen % divisor, 32, 0xFFFFFFFF); + } + } break; + case 7: { + // RV64M: REMUW + uint32_t dividen = (Word) rsdata[0]; + uint32_t divisor = (Word) rsdata[1]; + if (divisor == 0){ + rddata = signExt(dividen, 32, 0xFFFFFFFF); + } else { + rddata = signExt(dividen % divisor, 32, 0xFFFFFFFF); + } + } break; + default: + std::abort(); + } + } else { + switch (func3) { + case 0: + if (func7){ + // RV64I: SUBW + rddata = signExt((Word)rsdata[0] - (Word)rsdata[1], 32, 0xFFFFFFFF); + } + else{ + // RV64I: ADDW + rddata = signExt((Word)rsdata[0] + (Word)rsdata[1], 32, 0xFFFFFFFF); + } + break; + case 1: + // RV64I: SLLW + rddata = signExt((Word)rsdata[0] << (Word)rsdata[1], 32, 0xFFFFFFFF); + break; + case 5: + if (func7) { + // RV64I: SRAW + rddata = signExt((WordI)rsdata[0] >> (WordI)rsdata[1], 32, 0xFFFFFFFF); + } else { + // RV64I: SRLW + rddata = signExt((Word)rsdata[0] >> (Word)rsdata[1], 32, 0xFFFFFFFF); + } + break; + default: + std::abort(); + } + } + rd_write = true; + } break; + + // simx64 + case I_INST_64: { + switch (func3) { + case 0: + // RV64I: ADDIW + rddata = signExt((Word)rsdata[0] + (Word)immsrc, 32, 0xFFFFFFFF); + break; + case 1: + // RV64I: SLLIW + rddata = signExt((Word)rsdata[0] << (Word)immsrc, 32, 0xFFFFFFFF); + break; + case 5: + if (func7) { + // RV64I: SRAIW + DoubleWord result = signExt((WordI)rsdata[0] >> (WordI)immsrc, 32, 0xFFFFFFFF); + rddata = result; + } else { + // RV64I: SRLIW + DoubleWord result = signExt((Word)rsdata[0] >> (Word)immsrc, 32, 0xFFFFFFFF); + rddata = result; + } + break; + default: + std::abort(); + } + rd_write = true; + } break; + case SYS_INST: { + DoubleWord csr_addr = immsrc & 0x00000FFF; + DoubleWord csr_value = core_->get_csr(csr_addr, t, id_); + switch (func3) { + case 0: + if (csr_addr < 2) { + // ECALL/EBREAK + core_->trigger_ebreak(); + } + break; + case 1: + // RV32I: CSRRW + rddata = csr_value; + core_->set_csr(csr_addr, rsdata[0], t, id_); + rd_write = true; + break; + case 2: + // RV32I: CSRRS + rddata = csr_value; + core_->set_csr(csr_addr, csr_value | rsdata[0], t, id_); + rd_write = true; + break; + case 3: + // RV32I: CSRRC + rddata = csr_value; + core_->set_csr(csr_addr, csr_value & ~rsdata[0], t, id_); + rd_write = true; + break; + case 5: + // RV32I: CSRRWI + rddata = csr_value; + core_->set_csr(csr_addr, rsrc0, t, id_); + rd_write = true; + break; + case 6: + // RV32I: CSRRSI + rddata = csr_value; + core_->set_csr(csr_addr, csr_value | rsrc0, t, id_); + rd_write = true; + break; + case 7: + // RV32I: CSRRCI + rddata = csr_value; + core_->set_csr(csr_addr, csr_value & ~rsrc0, t, id_); + rd_write = true; + break; + default: + break; + } + } break; + // RV32I: FENCE + case FENCE: + pipeline->stall_warp = true; + runOnce = true; + break; + case (FL | VL): + if (func3 == 0x2) { + // RV32F: FLW + DoubleWord memAddr = rsdata[0] + immsrc; + DoubleWord data_read = core_->dcache_read(memAddr, 4); + D(3, "LOAD MEM: ADDRESS=0x" << std::hex << memAddr << ", DATA=0x" << data_read); + rddata = data_read | 0xFFFFFFFF00000000; + } else if (func3 == 0x3) { + // RV32D: FLD + DoubleWord memAddr = ((rsdata[0] + immsrc) & 0xFFFFFFF8); + DoubleWord data_read = core_->dcache_read(memAddr, 8); + D(3, "LOAD MEM: ADDRESS=0x" << std::hex << memAddr << ", DATA=0x" << data_read); + rddata = data_read; + } else { + D(3, "Executing vector load"); + D(3, "lmul: " << vtype_.vlmul << " VLEN:" << (core_->arch().vsize() * 8) << "sew: " << vtype_.vsew); + D(3, "src: " << rsrc0 << " " << rsdata[0]); + D(3, "dest" << rdest); + D(3, "width" << instr.getVlsWidth()); + + auto &vd = vRegFile_[rdest]; + + switch (instr.getVlsWidth()) { + case 6: { + //load DoubleWord and unit strided (not checking for unit stride) + for (int i = 0; i < vl_; i++) { + DoubleWord memAddr = ((rsdata[0]) & 0xFFFFFFFC) + (i * vtype_.vsew / 8); + D(3, "STORE MEM: ADDRESS=0x" << std::hex << memAddr); + DoubleWord data_read = core_->dcache_read(memAddr, 4); + D(3, "Mem addr: " << std::hex << memAddr << " Data read " << data_read); + int *result_ptr = (int *)(vd.data() + i); + *result_ptr = data_read; + } + } break; + default: + std::abort(); + } + break; + } + rd_write = true; + break; + case (FS | VS): + if (func3 == 0x2) { + // RV32F: FSW + DoubleWord memAddr = rsdata[0] + immsrc; + core_->dcache_write(memAddr, rsdata[1], 4); + D(3, "STORE MEM: ADDRESS=0x" << std::hex << memAddr); + } else if (func3 == 0x3){ + // RV32D: FSD + DoubleWord memAddr = rsdata[0] + immsrc; + core_->dcache_write(memAddr, rsdata[1], 8); + D(3, "STORE MEM: ADDRESS=0x" << std::hex << memAddr); + } else { + for (int i = 0; i < vl_; i++) { + DoubleWord memAddr = rsdata[0] + (i * vtype_.vsew / 8); + D(3, "STORE MEM: ADDRESS=0x" << std::hex << memAddr); + switch (instr.getVlsWidth()) { + case 6: { + //store DoubleWord and unit strided (not checking for unit stride) + uint32_t value = *(uint32_t *)(vRegFile_[instr.getVs3()].data() + i); + core_->dcache_write(memAddr, value, 4); + D(3, "store: " << memAddr << " value:" << value); + } break; + default: + std::abort(); + } + } + } + break; + case FCI: { + uint32_t frm = get_fpu_rm(func3, core_, t, id_); + uint32_t fflags = 0; + switch (func7) { + case 0x00: // RV32F: FADD.S + rddata = rv_fadd(rsdata[0], rsdata[1], frm, &fflags); + break; + case 0x01: // RV32D: FADD.D + rddata = rv_fadd_d(rsdata[0], rsdata[1], frm, &fflags); + break; + case 0x04: // RV32F: FSUB.S + rddata = rv_fsub(rsdata[0], rsdata[1], frm, &fflags); + break; + case 0x05: // RV32D: FSUB.D + rddata = rv_fsub_d(rsdata[0], rsdata[1], frm, &fflags); + break; + case 0x08: // RV32F: FMUL.S + rddata = rv_fmul(rsdata[0], rsdata[1], frm, &fflags); + break; + case 0x09: // RV32D: FMUL.D + rddata = rv_fmul_d(rsdata[0], rsdata[1], frm, &fflags); + break; + case 0x0c: // RV32F: FDIV.S + rddata = rv_fdiv(rsdata[0], rsdata[1], frm, &fflags); + break; + case 0x0d: // RV32D: FDIV.D + rddata = rv_fdiv_d(rsdata[0], rsdata[1], frm, &fflags); + break; + case 0x2c: // RV32F: FSQRT.S + rddata = rv_fsqrt(rsdata[0], frm, &fflags); + break; + case 0x2d: // RV32D: FSQRT.D + rddata = rv_fsqrt_d(rsdata[0], frm, &fflags); + break; + case 0x10: + switch (func3) { + case 0: // RV32F: FSGNJ.S + rddata = rv_fsgnj((Word)rsdata[0], (Word)rsdata[1]) | 0xFFFFFFFF00000000; + break; + case 1: // RV32F: FSGNJN.S + rddata = rv_fsgnjn((Word)rsdata[0], (Word)rsdata[1]) | 0xFFFFFFFF00000000; + break; + case 2: // RV32F: FSGNJX.S + rddata = rv_fsgnjx((Word)rsdata[0], (Word)rsdata[1]) | 0xFFFFFFFF00000000; + break; + } + break; + case 0x11: + switch (func3) { + case 0: // RV32D: FSGNJ.D + rddata = rv_fsgnj_d(rsdata[0], rsdata[1]); + break; + case 1: // RV32D: FSGNJN.D + rddata = rv_fsgnjn_d(rsdata[0], rsdata[1]); + break; + case 2: // RV32D: FSGNJX.D + rddata = rv_fsgnjx_d(rsdata[0], rsdata[1]); + break; + } + break; + case 0x14: + if (func3) { + // RV32F: FMAX.S + rddata = rv_fmax(rsdata[0], rsdata[1], &fflags); + } else { + // RV32F: FMIN.S + rddata = rv_fmin(rsdata[0], rsdata[1], &fflags); + } + break; + case 0x15: + if (func3) { + // RV32D: FMAX.D + rddata = rv_fmax_d(rsdata[0], rsdata[1], &fflags); + } else { + // RV32D: FMIN.D + rddata = rv_fmin_d(rsdata[0], rsdata[1], &fflags); + } + break; + case 0x20: rddata = rv_dtof(rsdata[0]); + break; + case 0x21: rddata = rv_ftod(rsdata[0]); + break; + case 0x60: + switch(rsrc1) { + case 0: + // RV32F: FCVT.W.S + rddata = signExt(rv_ftoi(rsdata[0], frm, &fflags), 32, 0xFFFFFFFF); + break; + case 1: + // RV32F: FCVT.WU.S + rddata = signExt(rv_ftou(rsdata[0], frm, &fflags), 32, 0xFFFFFFFF); + break; + case 2: + // RV64F: FCVT.L.S + rddata = rv_ftol(rsdata[0], frm, &fflags); + break; + case 3: + // RV64F: FCVT.LU.S + rddata = rv_ftolu(rsdata[0], frm, &fflags); + break; + } + break; + case 0x61: + switch(rsrc1) { + case 0: + // RV32D: FCVT.W.D + rddata = signExt(rv_ftoi_d(rsdata[0], frm, &fflags), 32, 0xFFFFFFFF); + break; + case 1: + // RV32D: FCVT.WU.D + rddata = signExt(rv_ftou_d(rsdata[0], frm, &fflags), 32, 0xFFFFFFFF); + break; + case 2: + // RV64D: FCVT.L.D + rddata = rv_ftol_d(rsdata[0], frm, &fflags); + break; + case 3: + // RV64D: FCVT.LU.D + rddata = rv_ftolu_d(rsdata[0], frm, &fflags); + break; + } + break; + case 0x70: + if (func3) { + // RV32F: FCLASS.S + rddata = rv_fclss(rsdata[0]); + } else { + // RV32F: FMV.X.W + rddata = signExt((Word)rsdata[0], 32, 0xFFFFFFFF); + } + break; + case 0x71: + if (func3) { + // RV32D: FCLASS.D + rddata = rv_fclss_d(rsdata[0]); + } else { + // RV64D: FMV.X.D + rddata = rsdata[0]; + } + break; + case 0x50: + switch(func3) { + case 0: + // RV32F: FLE.S + rddata = rv_fle(rsdata[0], rsdata[1], &fflags); + break; + case 1: + // RV32F: FLT.S + rddata = rv_flt(rsdata[0], rsdata[1], &fflags); + break; + case 2: + // RV32F: FEQ.S + rddata = rv_feq(rsdata[0], rsdata[1], &fflags); + break; + } break; + case 0x51: + switch(func3) { + case 0: + // RV32D: FLE.D + rddata = rv_fle_d(rsdata[0], rsdata[1], &fflags); + break; + case 1: + // RV32D: FLT.D + rddata = rv_flt_d(rsdata[0], rsdata[1], &fflags); + break; + case 2: + // RV32D: FEQ.D + rddata = rv_feq_d(rsdata[0], rsdata[1], &fflags); + break; + } break; + case 0x68: + switch(rsrc1) { + case 0: + // RV32F: FCVT.S.W + rddata = rv_itof(rsdata[0], frm, &fflags); + break; + case 1: + // RV32F: FCVT.S.WU + rddata = rv_utof(rsdata[0], frm, &fflags); + break; + case 2: + // RV64F: FCVT.S.L + rddata = rv_ltof(rsdata[0], frm, &fflags); + break; + case 3: + // RV64F: FCVT.S.LU + rddata = rv_lutof(rsdata[0], frm, &fflags); + break; + } + break; + case 0x69: + switch(rsrc1) { + case 0: + // RV32D: FCVT.D.W + rddata = rv_itof_d(rsdata[0], frm, &fflags); + break; + case 1: + // RV32F: FCVT.D.WU + rddata = rv_utof_d(rsdata[0], frm, &fflags); + break; + case 2: + // RV64D: FCVT.D.L + rddata = rv_ltof_d(rsdata[0], frm, &fflags); + break; + case 3: + // RV64D: FCVT.D.LU + rddata = rv_lutof_d(rsdata[0], frm, &fflags); + break; + } + break; + case 0x78: + // RV32F: FMV.W.X + rddata = rsdata[0]; + break; + case 0x79: + // RV64D: FMV.D.X + rddata = rsdata[0]; + break; + } + update_fcrs(fflags, core_, t, id_); + rd_write = true; + } break; + case FMADD: + case FMSUB: + case FMNMADD: + case FMNMSUB: { + int frm = get_fpu_rm(func3, core_, t, id_); + // simx64 + Word fflags = 0; + switch (opcode) { + case FMADD: + if (func2) + // RV32D: FMADD.D + rddata = rv_fmadd_d(rsdata[0], rsdata[1], rsdata[2], frm, &fflags); + else + // RV32F: FMADD.S + rddata = rv_fmadd(rsdata[0], rsdata[1], rsdata[2], frm, &fflags); + break; + case FMSUB: + if (func2) + // RV32D: FMSUB.D + rddata = rv_fmsub_d(rsdata[0],rsdata[1], rsdata[2], frm, &fflags); + else + // RV32F: FMSUB.S + rddata = rv_fmsub(rsdata[0], rsdata[1], rsdata[2], frm, &fflags); + break; + case FMNMADD: + if (func2) + // RV32D: FNMADD.D + rddata = rv_fnmadd_d(rsdata[0], rsdata[1], rsdata[2], frm, &fflags); + else + // RV32F: FNMADD.S + rddata = rv_fnmadd(rsdata[0], rsdata[1], rsdata[2], frm, &fflags); + break; + case FMNMSUB: + if (func2) + // RV32D: FNMSUB.D + rddata = rv_fnmsub_d(rsdata[0], rsdata[1], rsdata[2], frm, &fflags); + else + // RV32F: FNMSUB.S + rddata = rv_fnmsub(rsdata[0], rsdata[1], rsdata[2], frm, &fflags); + break; + default: + break; + } + update_fcrs(fflags, core_, t, id_); + rd_write = true; + } break; + case GPGPU: + switch (func3) { + case 0: { + // TMC + if (rsrc1) { + // predicate mode + ThreadMask pred; + for (int i = 0; i < num_threads; ++i) { + pred[i] = tmask_[i] ? (iRegFile_[i][rsrc0] != 0) : 0; + } + if (pred.any()) { + tmask_ &= pred; + } + } else { + tmask_.reset(); + for (int i = 0; i < num_threads; ++i) { + tmask_[i] = rsdata[0] & (1 << i); + } + } + D(3, "*** TMC " << tmask_); + active_ = tmask_.any(); + pipeline->stall_warp = true; + runOnce = true; + } break; + case 1: { + // WSPAWN + int active_warps = std::min(rsdata[0], core_->arch().num_warps()); + D(3, "*** Spawning " << (active_warps-1) << " warps at PC: " << std::hex << rsdata[1]); + for (int i = 1; i < active_warps; ++i) { + Warp &newWarp = core_->warp(i); + newWarp.setPC(rsdata[1]); + newWarp.setTmask(0, true); + } + pipeline->stall_warp = true; + runOnce = true; + } break; + case 2: { + // SPLIT + if (HasDivergentThreads(tmask_, iRegFile_, rsrc0)) { + ThreadMask tmask; + for (int i = 0; i < num_threads; ++i) { + tmask[i] = tmask_[i] && !iRegFile_[i][rsrc0]; + } + + DomStackEntry e(tmask, nextPC); + domStack_.push(tmask_); + domStack_.push(e); + for (size_t i = 0; i < e.tmask.size(); ++i) { + tmask_[i] = !e.tmask[i] && tmask_[i]; + } + active_ = tmask_.any(); + + DPH(3, "*** Split: New TM="); + for (int i = 0; i < num_threads; ++i) DPN(3, tmask_[num_threads-i-1]); + DPN(3, ", Pushed TM="); + for (int i = 0; i < num_threads; ++i) DPN(3, e.tmask[num_threads-i-1]); + DPN(3, ", PC=0x" << std::hex << e.PC << "\n"); + } else { + D(3, "*** Unanimous pred"); + DomStackEntry e(tmask_); + e.unanimous = true; + domStack_.push(e); + } + pipeline->stall_warp = true; + runOnce = true; + } break; + case 3: { + // JOIN + if (!domStack_.empty() && domStack_.top().unanimous) { + D(3, "*** Uninimous branch at join"); + tmask_ = domStack_.top().tmask; + active_ = tmask_.any(); + domStack_.pop(); + } else { + if (!domStack_.top().fallThrough) { + nextPC = domStack_.top().PC; + D(3, "*** Join: next PC: " << std::hex << nextPC << std::dec); + } + + tmask_ = domStack_.top().tmask; + active_ = tmask_.any(); + + DPH(3, "*** Join: New TM="); + for (int i = 0; i < num_threads; ++i) DPN(3, tmask_[num_threads-i-1]); + DPN(3, "\n"); + + domStack_.pop(); + } + pipeline->stall_warp = true; + runOnce = true; + } break; + case 4: { + // BAR + active_ = false; + core_->barrier(rsdata[0], rsdata[1], id_); + pipeline->stall_warp = true; + runOnce = true; + } break; + case 6: { + // PREFETCH + int addr = rsdata[0]; + printf("*** PREFETCHED %d ***\n", addr); + } break; + default: + std::abort(); + } + break; + case VSET: { + int VLEN = core_->arch().vsize() * 8; + int VLMAX = (instr.getVlmul() * VLEN) / instr.getVsew(); + switch (func3) { + case 0: // vector-vector + switch (func6) { + case 0: { + auto& vr1 = vRegFile_[rsrc0]; + auto& vr2 = vRegFile_[rsrc1]; + auto& vd = vRegFile_[rdest]; + auto& mask = vRegFile_[0]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t emask = *(uint8_t *)(mask.data() + i); + uint8_t value = emask & 0x1; + if (vmask || (!vmask && value)) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t result = first + second; + D(3, "Adding " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t emask = *(uint16_t *)(mask.data() + i); + uint16_t value = emask & 0x1; + if (vmask || (!vmask && value)) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t result = first + second; + D(3, "Adding " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t emask = *(uint32_t *)(mask.data() + i); + uint32_t value = emask & 0x1; + if (vmask || (!vmask && value)) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t result = first + second; + D(3, "Adding " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + } + } + } break; + case 24: { + //vmseq + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t result = (first == second) ? 1 : 0; + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t result = (first == second) ? 1 : 0; + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t result = (first == second) ? 1 : 0; + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + } + } break; + case 25: { + //vmsne + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t result = (first != second) ? 1 : 0; + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t result = (first != second) ? 1 : 0; + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t result = (first != second) ? 1 : 0; + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + } + } break; + case 26: { + //vmsltu + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t result = (first < second) ? 1 : 0; + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t result = (first < second) ? 1 : 0; + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t result = (first < second) ? 1 : 0; + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + } + } break; + case 27: { + //vmslt + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + int8_t first = *(int8_t *)(vr1.data() + i); + int8_t second = *(int8_t *)(vr2.data() + i); + int8_t result = (first < second) ? 1 : 0; + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + int16_t first = *(int16_t *)(vr1.data() + i); + int16_t second = *(int16_t *)(vr2.data() + i); + int16_t result = (first < second) ? 1 : 0; + D(3, "Comparing " << first << " + " << second << " = " << result); + *(int16_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + int32_t first = *(int32_t *)(vr1.data() + i); + int32_t second = *(int32_t *)(vr2.data() + i); + int32_t result = (first < second) ? 1 : 0; + D(3, "Comparing " << first << " + " << second << " = " << result); + *(int32_t *)(vd.data() + i) = result; + } + } + } break; + case 28: { + //vmsleu + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t result = (first <= second) ? 1 : 0; + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t result = (first <= second) ? 1 : 0; + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t result = (first <= second) ? 1 : 0; + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + } + } break; + case 29: { + //vmsle + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + int8_t first = *(int8_t *)(vr1.data() + i); + int8_t second = *(int8_t *)(vr2.data() + i); + int8_t result = (first <= second) ? 1 : 0; + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + int16_t first = *(int16_t *)(vr1.data() + i); + int16_t second = *(int16_t *)(vr2.data() + i); + int16_t result = (first <= second) ? 1 : 0; + D(3, "Comparing " << first << " + " << second << " = " << result); + *(int16_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + int32_t first = *(int32_t *)(vr1.data() + i); + int32_t second = *(int32_t *)(vr2.data() + i); + int32_t result = (first <= second) ? 1 : 0; + D(3, "Comparing " << first << " + " << second << " = " << result); + *(int32_t *)(vd.data() + i) = result; + } + } + } break; + case 30: { + //vmsgtu + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t result = (first > second) ? 1 : 0; + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t result = (first > second) ? 1 : 0; + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t result = (first > second) ? 1 : 0; + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + } + } break; + case 31: { + //vmsgt + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + int8_t first = *(int8_t *)(vr1.data() + i); + int8_t second = *(int8_t *)(vr2.data() + i); + int8_t result = (first > second) ? 1 : 0; + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + int16_t first = *(int16_t *)(vr1.data() + i); + int16_t second = *(int16_t *)(vr2.data() + i); + int16_t result = (first > second) ? 1 : 0; + D(3, "Comparing " << first << " + " << second << " = " << result); + *(int16_t *)(vd.data() + i) = result; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + int32_t first = *(int32_t *)(vr1.data() + i); + int32_t second = *(int32_t *)(vr2.data() + i); + int32_t result = (first > second) ? 1 : 0; + D(3, "Comparing " << first << " + " << second << " = " << result); + *(int32_t *)(vd.data() + i) = result; + } + } + } break; + } + break; + case 2: { + switch (func6) { + case 24: { + // vmandnot + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t first_value = (first & 0x1); + uint8_t second_value = (second & 0x1); + uint8_t result = (first_value & !second_value); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint8_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t first_value = (first & 0x1); + uint16_t second_value = (second & 0x1); + uint16_t result = (first_value & !second_value); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint16_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t first_value = (first & 0x1); + uint32_t second_value = (second & 0x1); + uint32_t result = (first_value & !second_value); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint32_t *)(vd.data() + i) = 0; + } + } + } break; + case 25: { + // vmand + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t first_value = (first & 0x1); + uint8_t second_value = (second & 0x1); + uint8_t result = (first_value & second_value); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint8_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t first_value = (first & 0x1); + uint16_t second_value = (second & 0x1); + uint16_t result = (first_value & second_value); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint16_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t first_value = (first & 0x1); + uint32_t second_value = (second & 0x1); + uint32_t result = (first_value & second_value); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint32_t *)(vd.data() + i) = 0; + } + } + } break; + case 26: { + // vmor + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t first_value = (first & 0x1); + uint8_t second_value = (second & 0x1); + uint8_t result = (first_value | second_value); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint8_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t first_value = (first & 0x1); + uint16_t second_value = (second & 0x1); + uint16_t result = (first_value | second_value); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint16_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t first_value = (first & 0x1); + uint32_t second_value = (second & 0x1); + uint32_t result = (first_value | second_value); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint32_t *)(vd.data() + i) = 0; + } + } + } break; + case 27: { + //vmxor + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t first_value = (first & 0x1); + uint8_t second_value = (second & 0x1); + uint8_t result = (first_value ^ second_value); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint8_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t first_value = (first & 0x1); + uint16_t second_value = (second & 0x1); + uint16_t result = (first_value ^ second_value); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint16_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t first_value = (first & 0x1); + uint32_t second_value = (second & 0x1); + uint32_t result = (first_value ^ second_value); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint32_t *)(vd.data() + i) = 0; + } + } + } break; + case 28: { + //vmornot + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t first_value = (first & 0x1); + uint8_t second_value = (second & 0x1); + uint8_t result = (first_value | !second_value); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint8_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t first_value = (first & 0x1); + uint16_t second_value = (second & 0x1); + uint16_t result = (first_value | !second_value); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint16_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t first_value = (first & 0x1); + uint32_t second_value = (second & 0x1); + uint32_t result = (first_value | !second_value); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint32_t *)(vd.data() + i) = 0; + } + } + } break; + case 29: { + //vmnand + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t first_value = (first & 0x1); + uint8_t second_value = (second & 0x1); + uint8_t result = !(first_value & second_value); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint8_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t first_value = (first & 0x1); + uint16_t second_value = (second & 0x1); + uint16_t result = !(first_value & second_value); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint16_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t first_value = (first & 0x1); + uint32_t second_value = (second & 0x1); + uint32_t result = !(first_value & second_value); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint32_t *)(vd.data() + i) = 0; + } + } + } break; + case 30: { + //vmnor + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t first_value = (first & 0x1); + uint8_t second_value = (second & 0x1); + uint8_t result = !(first_value | second_value); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint8_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t first_value = (first & 0x1); + uint16_t second_value = (second & 0x1); + uint16_t result = !(first_value | second_value); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint16_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t first_value = (first & 0x1); + uint32_t second_value = (second & 0x1); + uint32_t result = !(first_value | second_value); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint32_t *)(vd.data() + i) = 0; + } + } + } break; + case 31: { + //vmxnor + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t first_value = (first & 0x1); + uint8_t second_value = (second & 0x1); + uint8_t result = !(first_value ^ second_value); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint8_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t first_value = (first & 0x1); + uint16_t second_value = (second & 0x1); + uint16_t result = !(first_value ^ second_value); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint16_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t first_value = (first & 0x1); + uint32_t second_value = (second & 0x1); + uint32_t result = !(first_value ^ second_value); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint32_t *)(vd.data() + i) = 0; + } + } + } break; + case 37: { + //vmul + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t result = (first * second); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint8_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t result = (first * second); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint16_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t result = (first * second); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint32_t *)(vd.data() + i) = 0; + } + } + } break; + case 45: { + // vmacc + auto &vr1 = vRegFile_[rsrc0]; + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t first = *(uint8_t *)(vr1.data() + i); + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t result = (first * second); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) += result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint8_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t first = *(uint16_t *)(vr1.data() + i); + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t result = (first * second); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) += result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint16_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t first = *(uint32_t *)(vr1.data() + i); + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t result = (first * second); + D(3, "Comparing " << first << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) += result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint32_t *)(vd.data() + i) = 0; + } + } + } break; + } + } break; + case 6: { + switch (func6) { + case 0: { + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t result = (rsdata[0] + second); + D(3, "Comparing " << rsdata[0] << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint8_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t result = (rsdata[0] + second); + D(3, "Comparing " << rsdata[0] << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint16_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t result = (rsdata[0] + second); + D(3, "Comparing " << rsdata[0] << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint32_t *)(vd.data() + i) = 0; + } + } + } break; + case 37: { + // vmul.vx + auto &vr2 = vRegFile_[rsrc1]; + auto &vd = vRegFile_[rdest]; + if (vtype_.vsew == 8) { + for (int i = 0; i < vl_; i++) { + uint8_t second = *(uint8_t *)(vr2.data() + i); + uint8_t result = (rsdata[0] * second); + D(3, "Comparing " << rsdata[0] << " + " << second << " = " << result); + *(uint8_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint8_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 16) { + for (int i = 0; i < vl_; i++) { + uint16_t second = *(uint16_t *)(vr2.data() + i); + uint16_t result = (rsdata[0] * second); + D(3, "Comparing " << rsdata[0] << " + " << second << " = " << result); + *(uint16_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint16_t *)(vd.data() + i) = 0; + } + } else if (vtype_.vsew == 32) { + for (int i = 0; i < vl_; i++) { + uint32_t second = *(uint32_t *)(vr2.data() + i); + uint32_t result = (rsdata[0] * second); + D(3, "Comparing " << rsdata[0] << " + " << second << " = " << result); + *(uint32_t *)(vd.data() + i) = result; + } + for (int i = vl_; i < VLMAX; i++) { + *(uint32_t *)(vd.data() + i) = 0; + } + } + } break; + } + } break; + case 7: { + vtype_.vill = 0; + vtype_.vediv = instr.getVediv(); + vtype_.vsew = instr.getVsew(); + vtype_.vlmul = instr.getVlmul(); + + D(3, "lmul:" << vtype_.vlmul << " sew:" << vtype_.vsew << " ediv: " << vtype_.vediv << "rsrc_" << rsdata[0] << "VLMAX" << VLMAX); + + int s0 = rsdata[0]; + if (s0 <= VLMAX) { + vl_ = s0; + } else if (s0 < (2 * VLMAX)) { + vl_ = (int)ceil((s0 * 1.0) / 2.0); + } else if (s0 >= (2 * VLMAX)) { + vl_ = VLMAX; + } + rddata = vl_; + } break; + default: + std::abort(); + } + } break; + default: + std::abort(); + } + + if (rd_write) { + int rdt = instr.getRDType(); + switch (rdt) { + case 1: + if (rdest) { + D(2, "[" << std::dec << t << "] Dest Regs: r" << rdest << "=0x" << std::hex << std::hex << rddata); + iregs[rdest] = rddata; + } + break; + case 2: + D(2, "[" << std::dec << t << "] Dest Regs: fr" << rdest << "=0x" << std::hex << std::hex << rddata); + fregs[rdest] = rddata; + break; + default: + break; + } + } + } + + // simx64 + PC_ += 4; + if (PC_ != nextPC) { + D(3, "*** Next PC: " << std::hex << nextPC << std::dec); + PC_ = nextPC; + } +} diff --git a/sim/simx/instr.h b/sim/simx/instr.h new file mode 100644 index 00000000..68c28ca6 --- /dev/null +++ b/sim/simx/instr.h @@ -0,0 +1,147 @@ +#pragma once + +#include "types.h" + +namespace vortex { + +class Warp; + +enum Opcode { + NOP = 0, + R_INST = 0x33, + L_INST = 0x3, + I_INST = 0x13, + S_INST = 0x23, + B_INST = 0x63, + LUI_INST = 0x37, + AUIPC_INST= 0x17, + JAL_INST = 0x6f, + JALR_INST = 0x67, + SYS_INST = 0x73, + FENCE = 0x0f, + // F Extension + FL = 0x7, + FS = 0x27, + FCI = 0x53, + FMADD = 0x43, + FMSUB = 0x47, + FMNMSUB = 0x4b, + FMNMADD = 0x4f, + // Vector Extension + VSET = 0x57, + VL = 0x7, + VS = 0x27, + // GPGPU Extension + GPGPU = 0x6b, + // simx64 + // RV64 Standard Extensions + R_INST_64 = 0x3b, + I_INST_64 = 0x1b, +}; + +enum InstType { + N_TYPE, + R_TYPE, + I_TYPE, + S_TYPE, + B_TYPE, + U_TYPE, + J_TYPE, + V_TYPE, + R4_TYPE +}; + +class Instr { +public: + Instr() + : opcode_(Opcode::NOP) + , num_rsrcs_(0) + , has_imm_(false) + , rdest_(0) + , func3_(0) + , func7_(0) { + for (int i = 0; i < MAX_REG_SOURCES; ++i) { + rsrc_type_[i] = 0; + } + } + + /* Setters used to "craft" the instruction. */ + void setOpcode(Opcode opcode) { opcode_ = opcode; } + void setDestReg(int destReg) { rdest_type_ = 1; rdest_ = destReg; } + void setSrcReg(int srcReg) { rsrc_type_[num_rsrcs_] = 1; rsrc_[num_rsrcs_++] = srcReg; } + void setDestFReg(int destReg) { rdest_type_ = 2; rdest_ = destReg; } + void setSrcFReg(int srcReg) { rsrc_type_[num_rsrcs_] = 2; rsrc_[num_rsrcs_++] = srcReg; } + void setDestVReg(int destReg) { rdest_type_ = 3; rdest_ = destReg; } + void setSrcVReg(int srcReg) { rsrc_type_[num_rsrcs_] = 3; rsrc_[num_rsrcs_++] = srcReg; } + void setFunc2(Word func2) { func2_ = func2;} + void setFunc3(Word func3) { func3_ = func3; } + void setFunc7(Word func7) { func7_ = func7; } + void setImm(DoubleWord imm) { has_imm_ = true; imm_ = imm; } + void setVlsWidth(Word width) { vlsWidth_ = width; } + void setVmop(Word mop) { vMop_ = mop; } + void setVnf(Word nf) { vNf_ = nf; } + void setVmask(Word mask) { vmask_ = mask; } + void setVs3(Word vs) { vs3_ = vs; } + void setVlmul(Word lmul) { vlmul_ = 1 << lmul; } + void setVsew(Word sew) { vsew_ = 1 << (3+sew); } + void setVediv(Word ediv) { vediv_ = 1 << ediv; } + void setFunc6(Word func6) { func6_ = func6; } + + /* Getters used by encoders. */ + Opcode getOpcode() const { return opcode_; } + Word getFunc2() const { return func2_; } + Word getFunc3() const { return func3_; } + Word getFunc6() const { return func6_; } + Word getFunc7() const { return func7_; } + int getNRSrc() const { return num_rsrcs_; } + int getRSrc(int i) const { return rsrc_[i]; } + int getRSType(int i) const { return rsrc_type_[i]; } + int getRDest() const { return rdest_; } + int getRDType() const { return rdest_type_; } + bool hasImm() const { return has_imm_; } + DoubleWord getImm() const { return imm_; } + Word getVlsWidth() const { return vlsWidth_; } + Word getVmop() const { return vMop_; } + Word getvNf() const { return vNf_; } + Word getVmask() const { return vmask_; } + Word getVs3() const { return vs3_; } + Word getVlmul() const { return vlmul_; } + Word getVsew() const { return vsew_; } + Word getVediv() const { return vediv_; } + +private: + + enum { + MAX_REG_SOURCES = 3 + }; + + Opcode opcode_; + int num_rsrcs_; + bool has_imm_; + int rdest_type_; + int isrc_mask_; + int fsrc_mask_; + int vsrc_mask_; + DoubleWord imm_; + int rsrc_type_[MAX_REG_SOURCES]; + int rsrc_[MAX_REG_SOURCES]; + int rdest_; + Word func2_; + Word func3_; + Word func7_; + + //Vector + Word vmask_; + Word vlsWidth_; + Word vMop_; + Word vNf_; + Word vs3_; + Word vlmul_; + Word vsew_; + Word vediv_; + Word func6_; + + friend std::ostream &operator<<(std::ostream &, const Instr&); +}; + +} \ No newline at end of file diff --git a/sim/simx/main.cpp b/sim/simx/main.cpp new file mode 100644 index 00000000..9af8ff02 --- /dev/null +++ b/sim/simx/main.cpp @@ -0,0 +1,109 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "debug.h" +#include "types.h" +#include "core.h" +#include "args.h" + +using namespace vortex; + +int main(int argc, char **argv) { + + std::string archString("rv32imf"); + int num_cores(NUM_CORES * NUM_CLUSTERS); + int num_warps(NUM_WARPS); + int num_threads(NUM_THREADS); + std::string imgFileName; + bool showHelp(false); + bool showStats(false); + bool riscv_test(false); + + /* Read the command line arguments. */ + CommandLineArgFlag fh("-h", "--help", "", showHelp); + CommandLineArgSetter fa("-a", "--arch", "", archString); + CommandLineArgSetter fi("-i", "--image", "", imgFileName); + CommandLineArgSetter fc("-c", "--cores", "", num_cores); + CommandLineArgSetter fw("-w", "--warps", "", num_warps); + CommandLineArgSetter ft("-t", "--threads", "", num_threads); + CommandLineArgFlag fr("-r", "--riscv", "", riscv_test); + CommandLineArgFlag fs("-s", "--stats", "", showStats); + + CommandLineArg::readArgs(argc - 1, argv + 1); + + if (showHelp || imgFileName.empty()) { + std::cout << "Vortex emulator command line arguments:\n" + " -i, --image Program RAM image\n" + " -c, --cores Number of cores\n" + " -w, --warps Number of warps\n" + " -t, --threads Number of threads\n" + " -a, --arch Architecture string\n" + " -r, --riscv riscv test\n" + " -s, --stats Print stats on exit.\n"; + return 0; + } + + ArchDef arch(archString, num_cores, num_warps, num_threads); + + Decoder decoder(arch); + MemoryUnit mu(0, arch.wsize(), true); + + RAM ram((1<<12), (1<<20)); + + std::string program_ext(fileExtension(imgFileName.c_str())); + if (program_ext == "bin") { + ram.loadBinImage(imgFileName.c_str(), STARTUP_ADDR); + } else if (program_ext == "hex") { + ram.loadHexImage(imgFileName.c_str()); + } else { + std::cout << "*** error: only *.bin or *.hex images supported." << std::endl; + return -1; + } + + mu.attach(ram, 0, 0xFFFFFFFF); + + struct stat hello; + fstat(0, &hello); + + std::vector> cores(num_cores); + for (int i = 0; i < num_cores; ++i) { + cores[i] = std::make_shared(arch, decoder, mu, i); + } + + bool running; + int exitcode = 0; + do { + running = false; + for (auto& core : cores) { + core->step(); + if (core->running()) { + running = true; + } + if (core->check_ebreak()) { + exitcode = core->getIRegValue(3); + running = false; + break; + } + } + } while (running); + + if (riscv_test) { + if (1 == exitcode) { + std::cout << "Passed." << std::endl; + exitcode = 0; + } else { + std::cout << "Failed." << std::endl; + } + } else { + if (exitcode != 0) { + std::cout << "*** error: exitcode=" << exitcode << std::endl; + } + } + + return exitcode; +} diff --git a/sim/simx/pipeline.cpp b/sim/simx/pipeline.cpp new file mode 100644 index 00000000..c54977a0 --- /dev/null +++ b/sim/simx/pipeline.cpp @@ -0,0 +1,63 @@ +#include +#include "pipeline.h" + +using namespace vortex; + +namespace vortex { +std::ostream &operator<<(std::ostream &os, const Pipeline& pipeline) { + os << pipeline.name_ << ": valid=" << pipeline.valid << std::endl; + os << pipeline.name_ << ": stalled=" << pipeline.stalled << std::endl; + os << pipeline.name_ << ": stall_warp=" << pipeline.stall_warp << std::endl; + os << pipeline.name_ << ": wid=" << pipeline.wid << std::endl; + os << pipeline.name_ << ": PC=" << std::hex << pipeline.PC << std::endl; + os << pipeline.name_ << ": used_iregs=" << pipeline.used_iregs << std::endl; + os << pipeline.name_ << ": used_fregs=" << pipeline.used_fregs << std::endl; + os << pipeline.name_ << ": used_vregs=" << pipeline.used_vregs << std::endl; + return os; +} +} + +Pipeline::Pipeline(const char* name) +: name_(name) { + this->clear(); +} + +void Pipeline::clear() { + valid = false; + stalled = false; + stall_warp = false; + wid = 0; + PC = 0; + used_iregs.reset(); + used_fregs.reset(); + used_vregs.reset(); +} + +bool Pipeline::enter(Pipeline *drain) { + if (drain) { + if (drain->stalled) { + this->stalled = true; + return false; + } + drain->valid = false; + } + this->stalled = false; + if (!this->valid) + return false; + return true; +} + +void Pipeline::next(Pipeline *drain) { + if (drain) { + drain->valid = this->valid; + drain->stalled = this->stalled; + drain->stall_warp = this->stall_warp; + drain->wid = this->wid; + drain->PC = this->PC; + drain->rdest = this->rdest; + drain->rdest_type = this->rdest_type; + drain->used_iregs = this->used_iregs; + drain->used_fregs = this->used_fregs; + drain->used_vregs = this->used_vregs; + } +} \ No newline at end of file diff --git a/sim/simx/pipeline.h b/sim/simx/pipeline.h new file mode 100644 index 00000000..8d6034e6 --- /dev/null +++ b/sim/simx/pipeline.h @@ -0,0 +1,48 @@ + +#pragma once + +#include +#include +#include "types.h" +#include "debug.h" + +namespace vortex { + +class Instr; + +class Pipeline { +public: + Pipeline(const char* name); + + void clear(); + + bool enter(Pipeline* drain); + + void next(Pipeline* drain); + + //-- + bool valid; + + //-- + bool stalled; + bool stall_warp; + + //-- + int wid; + DoubleWord PC; + + //-- + int rdest_type; + int rdest; + RegMask used_iregs; + RegMask used_fregs; + RegMask used_vregs; + +private: + + const char* name_; + + friend std::ostream &operator<<(std::ostream &, const Pipeline&); +}; + +} \ No newline at end of file diff --git a/sim/simx/types.h b/sim/simx/types.h new file mode 100644 index 00000000..55d1dcea --- /dev/null +++ b/sim/simx/types.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include +#include + +namespace vortex { + +typedef uint8_t Byte; +typedef uint32_t Word; +typedef int32_t WordI; + +// simx64 +typedef uint64_t DoubleWord; +typedef int64_t DoubleWordI; + +// simx64 +typedef uint64_t Addr; +typedef uint64_t Size; + +typedef std::bitset<32> RegMask; + +typedef std::bitset<32> ThreadMask; + +typedef std::bitset<32> WarpMask; + +} \ No newline at end of file diff --git a/sim/simx/warp.cpp b/sim/simx/warp.cpp new file mode 100644 index 00000000..a1191dd7 --- /dev/null +++ b/sim/simx/warp.cpp @@ -0,0 +1,97 @@ +#include +#include +#include +#include +#include +#include + +#include "instr.h" +#include "core.h" + +using namespace vortex; + +Warp::Warp(Core *core, Word id) + : id_(id) + , core_(core) { + // simx64 + iRegFile_.resize(core_->arch().num_threads(), std::vector(core_->arch().num_regs(), 0)); + fRegFile_.resize(core_->arch().num_threads(), std::vector(core_->arch().num_regs(), 0)); + vRegFile_.resize(core_->arch().num_regs(), std::vector(core_->arch().vsize(), 0)); + this->clear(); +} + +void Warp::clear() { + PC_ = STARTUP_ADDR; + tmask_.reset(); + active_ = false; +} + +void Warp::step(Pipeline *pipeline) { + assert(tmask_.any()); + + DPH(2, "Step: wid=" << id_ << ", PC=0x" << std::hex << PC_ << ", tmask="); + for (int i = 0, n = core_->arch().num_threads(); i < n; ++i) + DPN(2, tmask_[n-i-1]); + DPN(2, "\n"); + + /* Fetch and decode. */ + + Word fetched = core_->icache_fetch(PC_); + auto instr = core_->decoder().decode(fetched, PC_); + + // Update pipeline + pipeline->valid = true; + pipeline->PC = PC_; + pipeline->rdest = instr->getRDest(); + pipeline->rdest_type = instr->getRDType(); + pipeline->used_iregs.reset(); + pipeline->used_fregs.reset(); + pipeline->used_vregs.reset(); + + switch (pipeline->rdest_type) { + case 1: + pipeline->used_iregs[pipeline->rdest] = 1; + break; + case 2: + pipeline->used_fregs[pipeline->rdest] = 1; + break; + case 3: + pipeline->used_vregs[pipeline->rdest] = 1; + break; + default: + break; + } + + for (int i = 0; i < instr->getNRSrc(); ++i) { + int type = instr->getRSType(i); + int reg = instr->getRSrc(i); + switch (type) { + case 1: + pipeline->used_iregs[reg] = 1; + break; + case 2: + pipeline->used_fregs[reg] = 1; + break; + case 3: + pipeline->used_vregs[reg] = 1; + break; + default: + break; + } + } + + // Execute + this->execute(*instr, pipeline); + + D(4, "Register state:"); + for (int i = 0; i < core_->arch().num_regs(); ++i) { + DPN(4, " %r" << std::setfill('0') << std::setw(2) << std::dec << i << ':'); + // for (int j = 0; j < core_->arch().num_threads(); ++j) { + // // simx64 + // DPN(4, ' ' << std::setfill('0') << std::setw(16) << std::hex << iRegFile_[j][i] << std::setfill(' ') << ' '); + // } + DPN(4, ' ' << std::setfill('0') << std::setw(16) << std::hex << iRegFile_[0][i] << std::setfill(' ') << ' '); + DPN(4, ' ' << std::setfill('0') << std::setw(16) << std::hex << fRegFile_[0][i] << std::setfill(' ') << ' '); + DPN(4, std::endl); + } +} \ No newline at end of file diff --git a/sim/simx/warp.h b/sim/simx/warp.h new file mode 100644 index 00000000..181ed973 --- /dev/null +++ b/sim/simx/warp.h @@ -0,0 +1,113 @@ +#ifndef __WARP_H +#define __WARP_H + +#include +#include +#include "types.h" + +namespace vortex { + +class Core; +class Instr; +class Pipeline; +struct DomStackEntry { + DomStackEntry(const ThreadMask &tmask, DoubleWord PC) + : tmask(tmask) + , PC(PC) + , fallThrough(false) + , unanimous(false) + {} + + DomStackEntry(const ThreadMask &tmask) + : tmask(tmask) + , PC(0) + , fallThrough(true) + , unanimous(false) + {} + + ThreadMask tmask; + DoubleWord PC; + bool fallThrough; + bool unanimous; +}; + +struct vtype { + int vill; + int vediv; + int vsew; + int vlmul; +}; + +class Warp { +public: + Warp(Core *core, Word id); + + void clear(); + + bool active() const { + return active_; + } + + void activate() { + active_ = true; + } + + std::size_t getActiveThreads() const { + if (active_) + return tmask_.count(); + return 0; + } + + Word id() const { + return id_; + } + + DoubleWord getPC() const { + return PC_; + } + + void setPC(DoubleWord PC) { + PC_ = PC; + } + + void setTmask(size_t index, bool value) { + tmask_[index] = value; + active_ = tmask_.any(); + } + + Word getTmask() const { + if (active_) + return tmask_.to_ulong(); + return 0; + } + + Word getIRegValue(int reg) const { + return iRegFile_[0][reg]; + } + + void step(Pipeline *); + +private: + + void execute(const Instr &instr, Pipeline *); + + Word id_; + bool active_; + Core *core_; + + DoubleWord PC_; + ThreadMask tmask_; + + // simx64 + std::vector> iRegFile_; + std::vector> fRegFile_; + std::vector> vRegFile_; + std::stack domStack_; + + struct vtype vtype_; + int vl_; +}; + +} + +#endif \ No newline at end of file