From f717ff56da0f6f011572211875fc74f45c0a1f23 Mon Sep 17 00:00:00 2001 From: felsabbagh3 Date: Mon, 11 Nov 2019 00:37:34 -0500 Subject: [PATCH] simX within 30% cycles of the RTL for matAdd --- simX/Makefile | 5 +- simX/cache_simX.v | 17 +- simX/core.cpp | 584 +++++++++++++++++++++++++++++++++---- simX/include/core.h | 26 +- simX/include/instruction.h | 4 +- simX/include/trace.h | 3 + simX/instruction.cpp | 20 +- simX/simX.cpp | 18 ++ simX/test_riscv.sh | 1 + 9 files changed, 605 insertions(+), 73 deletions(-) diff --git a/simX/Makefile b/simX/Makefile index 406dfe22..baa47df9 100644 --- a/simX/Makefile +++ b/simX/Makefile @@ -13,8 +13,9 @@ LIB= CF=-CFLAGS '-std=c++11 -fPIC -O3' LIGHTW=-Wno-UNOPTFLAT -Wno-BLKLOOPINIT - +DEB=--trace --prof-cfuncs -DVL_DEBUG=1 EXE=--exe $(LIB_OBJS) + all: simX # simX: @@ -22,7 +23,7 @@ all: simX simX: - verilator $(COMP) -cc $(FILE) $(INCLUDE) $(EXE) $(LIB) $(CF) $(LIGHTW) + verilator $(COMP) -cc $(FILE) $(INCLUDE) $(EXE) $(LIB) $(CF) $(LIGHTW) $(DEB) (cd obj_dir && make -j -f Vcache_simX.mk OPT='-DVL_DEBUG' VL_DEBUG=1 DVL_DEBUG=1) clean: diff --git a/simX/cache_simX.v b/simX/cache_simX.v index 83c2d388..249d7d2e 100644 --- a/simX/cache_simX.v +++ b/simX/cache_simX.v @@ -93,25 +93,26 @@ module cache_simX ( always @(posedge clk, posedge reset) begin if (reset) begin - icache_i_m_ready <= 0; - dcache_i_m_ready <= 0; + icache_i_m_ready = 0; + dcache_i_m_ready = 0; end else begin if (VX_dram_req_rsp_icache.o_m_valid) begin - icache_i_m_ready <= 1; + icache_i_m_ready = 1; + // $display("cache_simX.v: setting icache_i_m_ready = %d", icache_i_m_ready); end else if (icache_i_m_ready) begin - icache_i_m_ready <= 0; + icache_i_m_ready = 0; end else begin - icache_i_m_ready <= 0; + icache_i_m_ready = 0; end if (VX_dram_req_rsp.o_m_valid) begin - dcache_i_m_ready <= 1; + dcache_i_m_ready = 1; end else if (dcache_i_m_ready) begin - dcache_i_m_ready <= 0; + dcache_i_m_ready = 0; end else begin - dcache_i_m_ready <= 0; + dcache_i_m_ready = 0; end end diff --git a/simX/core.cpp b/simX/core.cpp index 9da7dfaf..4d7dc466 100644 --- a/simX/core.cpp +++ b/simX/core.cpp @@ -20,9 +20,74 @@ #include "include/qsim-harp.h" #endif + +#define NO_MEM_READ 7 +#define LB_MEM_READ 0 +#define LH_MEM_READ 1 +#define LW_MEM_READ 2 +#define LBU_MEM_READ 4 +#define LHU_MEM_READ 5 + + +#define NO_MEM_WRITE 7 +#define SB_MEM_WRITE 0 +#define SH_MEM_WRITE 1 +#define SW_MEM_WRITE 2 + +#define INIT_TRACE(trace_inst) \ + trace_inst.valid_inst = false; \ + trace_inst.pc = 0; \ + trace_inst.wid = schedule_w; \ + trace_inst.rs1 = -1; \ + trace_inst.rs2 = -1; \ + trace_inst.rd = -1; \ + trace_inst.is_lw = false; \ + trace_inst.is_sw = false; \ + trace_inst.mem_addresses = new unsigned[a.getNThds()]; \ + for (int tid = 0; tid < a.getNThds(); tid++) trace_inst.mem_addresses[tid] = 0xdeadbeef; \ + trace_inst.mem_stall_cycles = 0; \ + trace_inst.fetch_stall_cycles = 0; \ + trace_inst.stall_warp = false; \ + trace_inst.wspawn = false; \ + trace_inst.stalled = false; + +#define CPY_TRACE(drain, source) \ + drain.valid_inst = source.valid_inst; \ + drain.pc = source.pc; \ + drain.wid = source.wid; \ + drain.rs1 = source.rs1; \ + drain.rs2 = source.rs2; \ + drain.rd = source.rd; \ + drain.is_lw = source.is_lw; \ + drain.is_sw = source.is_sw; \ + for (int tid = 0; tid < a.getNThds(); tid++) drain.mem_addresses[tid] = source.mem_addresses[tid]; \ + drain.mem_stall_cycles = source.mem_stall_cycles; \ + drain.fetch_stall_cycles = source.fetch_stall_cycles; \ + drain.stall_warp = source.stall_warp; \ + drain.wspawn = source.wspawn; \ + drain.stalled = false; + using namespace Harp; using namespace std; + +void printTrace(trace_inst_t * trace, const char * stage_name) +{ + cout << "********************************** " << stage_name << " *********************************\n"; + cout << "valid: " << trace->valid_inst << '\n'; + cout << "PC: " << hex << trace->pc << dec << '\n'; + cout << "wid: " << trace->wid << '\n'; + cout << "rd: " << trace->rd << "\trs1: " << trace->rs1 << "\trs2: " << trace->rs2 << '\n'; + cout << "is_lw: " << trace->is_lw << '\n'; + cout << "is_sw: " << trace->is_sw << '\n'; + cout << "fetch_stall_cycles: " << trace->fetch_stall_cycles << '\n'; + cout << "mem_stall_cycles: " << trace->mem_stall_cycles << '\n'; + + cout << "stall_warp: " << trace->stall_warp << '\n'; + cout << "wspawn: " << trace->wspawn << '\n'; + cout << "stalled: " << trace->stalled << '\n'; +} + #ifdef EMU_INSTRUMENTATION void Harp::reg_doRead(Word cpuId, Word regNum) { Harp::OSDomain::osDomain->do_reg(cpuId, regNum, 8, true); @@ -34,10 +99,45 @@ void Harp::reg_doWrite(Word cpuId, Word regNum) { #endif Core::Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id): - a(a), iDec(d), mem(mem), steps(0) + a(a), iDec(d), mem(mem), steps(4) { + release_warp = false; + foundSchedule = true; + schedule_w = 0; + INIT_TRACE(inst_in_fetch); + INIT_TRACE(inst_in_decode); + INIT_TRACE(inst_in_scheduler); + INIT_TRACE(inst_in_exe); + INIT_TRACE(inst_in_lsu); + INIT_TRACE(inst_in_wb); + + for (int i = 0; i < 32; i++) + { + stallWarp[i] = false; + for (int j = 0; j < 32; j++) + { + renameTable[i][j] = true; + } + } + cache_simulator = new Vcache_simX; + + m_trace = new VerilatedVcdC; + cache_simulator->trace(m_trace, 99); + m_trace->open("simXtrace.vcd"); + + cache_simulator->reset = 1; + cache_simulator->clk = 0; + cache_simulator->eval(); + // m_trace->dump(10); + cache_simulator->reset = 1; + cache_simulator->clk = 1; + cache_simulator->eval(); + // m_trace->dump(11); + cache_simulator->reset = 0; + cache_simulator->clk = 0; + for (unsigned i = 0; i < a.getNWarps(); ++i) w.push_back(Warp(this, i)); @@ -51,54 +151,270 @@ bool Core::interrupt(Word r0) { void Core::step() { + cout << "\n\n\n------------------------------------------------------\n"; + + steps++; + cout << "CYCLE: " << steps << '\n'; + + cout << "Stalled Warps:\n"; + for (int widd = 0; widd < a.getNWarps(); widd++) + { + cout << stallWarp[widd] << " "; + } + cout << '\n'; + + // cout << "Rename table\n"; + // for (int regii = 0; regii < 32; regii++) + // { + // cout << regii << ": " << renameTable[0][regii] << '\n'; + // } + + cout << '\n'; + + this->writeback(); + this->load_store(); + this->execute_unit(); + this->scheduler(); + this->decode(); this->fetch(); + + if (release_warp) + { + release_warp = false; + stallWarp[release_warp_num] = false; + } +} + +void Core::getCacheDelays(trace_inst_t * trace_inst) +{ + static int curr_cycle = 0; + if (trace_inst->valid_inst) + { + + bool in_dcache_in_valid[a.getNThds()]; + unsigned in_dcache_in_address[a.getNThds()]; + + unsigned in_dcache_mem_read; + unsigned in_dcache_mem_write; + if (trace_inst->is_lw) + { + in_dcache_mem_read = LW_MEM_READ; + in_dcache_mem_write = NO_MEM_WRITE; + } + else if (trace_inst->is_sw) + { + in_dcache_mem_read = NO_MEM_READ; + in_dcache_mem_write = SW_MEM_WRITE; + } + else + { + in_dcache_mem_read = NO_MEM_READ; + in_dcache_mem_write = NO_MEM_WRITE; + } + + for (int j = 0; j < a.getNThds(); j++) + { + if ((w[trace_inst->wid].tmask[j]) && (trace_inst->is_sw || trace_inst->is_lw)) + { + in_dcache_in_valid[j] = true; + in_dcache_in_address[j] = trace_inst->mem_addresses[j]; + } + else + { + in_dcache_in_valid[j] = false; + in_dcache_in_address[j] = 0xdeadbeef; + } + } + + cache_simulator->clk = 1; + cache_simulator->eval(); + m_trace->dump(2*curr_cycle); + + cache_simulator->in_icache_pc_addr = trace_inst->pc; + cache_simulator->in_icache_valid_pc_addr = 1; + + // DCache start + cache_simulator->in_dcache_mem_read = in_dcache_mem_read; + cache_simulator->in_dcache_mem_write = in_dcache_mem_write; + for (int cur_t = 0; cur_t < a.getNThds(); cur_t++) + { + cache_simulator->in_dcache_in_valid[cur_t] = in_dcache_in_valid[cur_t]; + cache_simulator->in_dcache_in_address[cur_t] = in_dcache_in_address[cur_t]; + } + // DCache end + cache_simulator->clk = 0; + cache_simulator->eval(); + m_trace->dump(2*curr_cycle+1); + + curr_cycle++; + + while((cache_simulator->out_icache_stall || cache_simulator->out_dcache_stall)) + { + + ////////// Feed input + if (cache_simulator->out_icache_stall) + { + cache_simulator->in_icache_pc_addr = trace_inst->pc; + cache_simulator->in_icache_valid_pc_addr = 1; + trace_inst->fetch_stall_cycles++; + } + else + { + cache_simulator->in_icache_valid_pc_addr = 0; + } + + if (cache_simulator->out_dcache_stall) + { + cache_simulator->in_dcache_mem_read = in_dcache_mem_read; + cache_simulator->in_dcache_mem_write = in_dcache_mem_write; + for (int cur_t = 0; cur_t < a.getNThds(); cur_t++) + { + cache_simulator->in_dcache_in_valid[cur_t] = in_dcache_in_valid[cur_t]; + cache_simulator->in_dcache_in_address[cur_t] = in_dcache_in_address[cur_t]; + } + trace_inst->mem_stall_cycles++; + } + else + { + cache_simulator->in_dcache_mem_read = NO_MEM_READ; + cache_simulator->in_dcache_mem_write = NO_MEM_WRITE; + for (int cur_t = 0; cur_t < a.getNThds(); cur_t++) + { + cache_simulator->in_dcache_in_valid[cur_t] = 0; + } + } + + cache_simulator->clk = 1; + cache_simulator->eval(); + m_trace->dump(2*curr_cycle); + + //////// Feed input + if (cache_simulator->out_icache_stall) + { + cache_simulator->in_icache_pc_addr = trace_inst->pc; + cache_simulator->in_icache_valid_pc_addr = 1; + } + else + { + cache_simulator->in_icache_valid_pc_addr = 0; + } + + if (cache_simulator->out_dcache_stall) + { + cache_simulator->in_dcache_mem_read = in_dcache_mem_read; + cache_simulator->in_dcache_mem_write = in_dcache_mem_write; + for (int cur_t = 0; cur_t < a.getNThds(); cur_t++) + { + cache_simulator->in_dcache_in_valid[cur_t] = in_dcache_in_valid[cur_t]; + cache_simulator->in_dcache_in_address[cur_t] = in_dcache_in_address[cur_t]; + } + } + else + { + cache_simulator->in_dcache_mem_read = NO_MEM_READ; + cache_simulator->in_dcache_mem_write = NO_MEM_WRITE; + for (int cur_t = 0; cur_t < a.getNThds(); cur_t++) + { + cache_simulator->in_dcache_in_valid[cur_t] = 0; + } + } + + cache_simulator->clk = 0; + cache_simulator->eval(); + m_trace->dump(2*curr_cycle+1); + + + curr_cycle++; + + } + + } +} + +void Core::warpScheduler() +{ + int numSteps = 0; + bool cont; + + do + { + numSteps++; + schedule_w = (schedule_w+1) % w.size(); + + bool has_active_threads = (w[schedule_w].activeThreads > 0); + bool stalled = stallWarp[schedule_w]; + + cont = ((!has_active_threads) || (stalled)) && (numSteps <= w.size()); + + // cout << "&&&&&&&WID: " << schedule_w << '\n'; + // cout << "activeThreads: " << w[schedule_w].activeThreads << "\t!has_active_threads: " << (!has_active_threads) << '\n'; + + // cout << "stalled: " << stalled << '\n'; + // cout << "numSteps: " << numSteps << " CONT: " << cont << '\n'; + + } while (cont); + + if (numSteps > w.size()) + { + this->foundSchedule = false; + } + else + { + this->foundSchedule = true; + } + } void Core::fetch() { - ++steps; #ifdef PRINT_ACTIVE_THREADS cout << endl << "Threads:"; #endif + // D(-1, "Found schedule: " << foundSchedule); - for (unsigned i = 0; i < w.size(); ++i) { - if (w[i].activeThreads) { + if ((!inst_in_scheduler.stalled) && (inst_in_fetch.fetch_stall_cycles == 0)) + { + // CPY_TRACE(inst_in_decode, inst_in_fetch); + // if (w[schedule_w].activeThreads) + { - trace_inst_t trace_inst; - trace_inst.valid_inst = false; - trace_inst.pc = 0; - trace_inst.wid = i; - trace_inst.rs1 = -1; - trace_inst.rs2 = -1; - trace_inst.rd = -1; - trace_inst.is_lw = false; - trace_inst.is_sw = false; - trace_inst.mem_addresses = new unsigned[a.getNThds()]; - trace_inst.mem_stall_cycles = 0; - trace_inst.fetch_stall_cycles = 0; - trace_inst.stall_warp = false; - - - D(3, "Core step stepping warp " << i << '[' << w[i].activeThreads << ']'); - w[i].step(&trace_inst); - D(3, "Now " << w[i].activeThreads << " active threads in " << i); - - D(-1, "********************************"); - D(-1, "*** valid: " << trace_inst.valid_inst << " pc: " << hex << trace_inst.pc << dec << " rs1..rs2..rd " << trace_inst.rs1 << ".." << trace_inst.rs2 << ".." << trace_inst.rd << "\n"); - D(-1, "********************************"); + INIT_TRACE(inst_in_fetch); + if (foundSchedule) + { + D(3, "Core step stepping warp " << schedule_w << '[' << w[schedule_w].activeThreads << ']'); + w[schedule_w].step(&inst_in_fetch); + D(3, "Now " << w[schedule_w].activeThreads << " active threads in " << schedule_w); + + this->getCacheDelays(&inst_in_fetch); + if (inst_in_fetch.stall_warp) + { + stallWarp[inst_in_fetch.wid] = true; + } + } + warpScheduler(); + } } + else + { + inst_in_fetch.stalled = false; + if (inst_in_fetch.fetch_stall_cycles > 0) inst_in_fetch.fetch_stall_cycles--; + } + + printTrace(&inst_in_fetch, "Fetch"); - #ifdef PRINT_ACTIVE_THREADS - for (unsigned j = 0; j < w[i].tmask.size(); ++j) { - if (w[i].activeThreads > j && w[i].tmask[j]) cout << " 1"; - else cout << " 0"; - if (j != w[i].tmask.size()-1 || i != w.size()-1) cout << ','; - } - #endif - } + // #ifdef PRINT_ACTIVE_THREADS + // for (unsigned j = 0; j < w[schedule_w].tmask.size(); ++j) { + // if (w[schedule_w].activeThreads > j && w[schedule_w].tmask[j]) cout << " 1"; + // else cout << " 0"; + // if (j != w[schedule_w].tmask.size()-1 || schedule_w != w.size()-1) cout << ','; + // } + // #endif + + + #ifdef PRINT_ACTIVE_THREADS cout << endl; #endif @@ -107,31 +423,198 @@ void Core::fetch() void Core::decode() { + + + if ((inst_in_fetch.fetch_stall_cycles == 0) && !inst_in_scheduler.stalled) + { + CPY_TRACE(inst_in_decode, inst_in_fetch); + INIT_TRACE(inst_in_fetch); + } + + printTrace(&inst_in_decode, "Decode"); } void Core::scheduler() { -} - -void Core::gpr_read() -{ - -} - -void Core::execute_unit() -{ + if (!inst_in_scheduler.stalled) + { + CPY_TRACE(inst_in_scheduler, inst_in_decode); + INIT_TRACE(inst_in_decode); + } + printTrace(&inst_in_scheduler, "scheduler"); } void Core::load_store() { + bool do_nothing = false; + if ((inst_in_lsu.mem_stall_cycles > 0) || (inst_in_lsu.stalled)) + { + // LSU currently busy + if ((inst_in_scheduler.is_lw || inst_in_scheduler.is_sw)) + { + inst_in_scheduler.stalled = true; + } + do_nothing = true; + } + else + { + // LSU not busy + if (inst_in_scheduler.is_lw || inst_in_scheduler.is_sw) + { + // Scheduler has LSU inst + bool scheduler_srcs_ready = true; + if (inst_in_scheduler.rs1 > 0) + { + scheduler_srcs_ready = scheduler_srcs_ready && renameTable[inst_in_scheduler.wid][inst_in_scheduler.rs1]; + } + + if (inst_in_scheduler.rs2 > 0) + { + scheduler_srcs_ready = scheduler_srcs_ready && renameTable[inst_in_scheduler.wid][inst_in_scheduler.rs2]; + } + + if (scheduler_srcs_ready) + { + if (inst_in_scheduler.rd != -1) renameTable[inst_in_scheduler.wid][inst_in_scheduler.rd] = false; + CPY_TRACE(inst_in_lsu, inst_in_scheduler); + INIT_TRACE(inst_in_scheduler); + } + else + { + inst_in_scheduler.stalled = true; + // INIT_TRACE(inst_in_lsu); + do_nothing = true; + } + } + else + { + // INIT_TRACE(inst_in_lsu); + do_nothing = true; + } + } + + if (inst_in_lsu.mem_stall_cycles > 0) inst_in_lsu.mem_stall_cycles--; + + printTrace(&inst_in_lsu, "LSU"); +} + +void Core::execute_unit() +{ + // cout << "$$$$$$$$$$$$$$$$$$$ EXE START\n"; + bool do_nothing = false; + // EXEC is always not busy + if (inst_in_scheduler.is_lw || inst_in_scheduler.is_sw) + { + // Not an execute instruction + // INIT_TRACE(inst_in_exe); + do_nothing = true; + } + else + { + bool scheduler_srcs_ready = true; + if (inst_in_scheduler.rs1 > 0) + { + scheduler_srcs_ready = scheduler_srcs_ready && renameTable[inst_in_scheduler.wid][inst_in_scheduler.rs1]; + // cout << "Rename RS1: " << inst_in_scheduler.rs1 << " is " << renameTable[inst_in_scheduler.wid][inst_in_scheduler.rs1] << " wid: " << inst_in_scheduler.wid << '\n'; + } + + if (inst_in_scheduler.rs2 > 0) + { + scheduler_srcs_ready = scheduler_srcs_ready && renameTable[inst_in_scheduler.wid][inst_in_scheduler.rs2]; + // cout << "Rename RS2: " << inst_in_scheduler.rs1 << " is " << renameTable[inst_in_scheduler.wid][inst_in_scheduler.rs2] << " wid: " << inst_in_scheduler.wid << '\n'; + } + + if (scheduler_srcs_ready) + { + if (inst_in_scheduler.rd != -1) { + // cout << "rename setting rd: " << inst_in_scheduler.rd << " to not useabel wid: " << inst_in_scheduler.wid << '\n'; + renameTable[inst_in_scheduler.wid][inst_in_scheduler.rd] = false; + } + CPY_TRACE(inst_in_exe, inst_in_scheduler); + INIT_TRACE(inst_in_scheduler); + } + else + { + cout << "&&&&&&&&&&&&&&&&&&&&&&&& EXECUTE SRCS NOT READY\n"; + inst_in_scheduler.stalled = true; + // INIT_TRACE(inst_in_exe); + do_nothing = true; + } + } + + // if (!do_nothing) + // { + + // } + + printTrace(&inst_in_exe, "execute_unit"); + // INIT_TRACE(inst_in_exe); +} + +void Core::writeback() +{ + + + if (inst_in_wb.rd > 0) renameTable[inst_in_wb.wid][inst_in_wb.rd] = true; + + if (inst_in_wb.stall_warp) + { + // stallWarp[inst_in_wb.wid] = false; + release_warp = true; + release_warp_num = inst_in_wb.wid; + } + + + INIT_TRACE(inst_in_wb); + + bool serviced_exe = false; + bool serviced_mem = false; + if ((inst_in_exe.rd > 0) || (inst_in_exe.stall_warp)) + { + CPY_TRACE(inst_in_wb, inst_in_exe); + INIT_TRACE(inst_in_exe); + + serviced_exe = true; + // cout << "WRITEBACK SERVICED EXE\n"; + } + + if ((inst_in_lsu.rd > 0) && (inst_in_lsu.mem_stall_cycles == 0)) + { + if (serviced_exe) + { + cout << "$$$$$$$$$$$$$$$$$$$$ Stalling LSU because EXE is being used\n"; + inst_in_lsu.stalled = true; + } + else + { + serviced_mem = true; + CPY_TRACE(inst_in_wb, inst_in_lsu); + INIT_TRACE(inst_in_lsu); + + } + } + + // if (!serviced_exe && !serviced_mem) INIT_TRACE(inst_in_wb); + + printTrace(&inst_in_wb, "Writeback"); } + bool Core::running() const { + bool stages_have_valid = inst_in_fetch.valid_inst || inst_in_decode.valid_inst || inst_in_scheduler.valid_inst || + inst_in_lsu.valid_inst || inst_in_exe.valid_inst || inst_in_wb.valid_inst; + + if (stages_have_valid) return true; + for (unsigned i = 0; i < w.size(); ++i) - if (w[i].running()) return true; + if (w[i].running()) + { + cout << "Warp ID " << i << " is running\n"; + return true; + } return false; } @@ -140,8 +623,7 @@ void Core::printStats() const { for (unsigned i = 0; i < w.size(); ++i) insts += w[i].insts; - cout << "Total steps: " << steps << endl; - cout << "Total insts: " << insts << endl; + cerr << "Total steps: " << steps << endl; for (unsigned i = 0; i < w.size(); ++i) { cout << "=== Warp " << i << " ===" << endl; w[i].printStats(); @@ -190,7 +672,7 @@ void Warp::step(trace_inst_t * trace_inst) { if (activeThreads == 0) return; - ++steps; + // ++steps; D(3, "in step pc=0x" << hex << pc); @@ -219,7 +701,7 @@ void Warp::step(trace_inst_t * trace_inst) { // Execute - inst->executeOn(*this); + inst->executeOn(*this, trace_inst); // At Debug Level 3, print debug info after each instruction. @@ -274,13 +756,13 @@ bool Warp::interrupt(Word r0) { } void Warp::printStats() const { - cerr << "Steps : " << steps << endl + cout << "Steps : " << steps << endl << "Insts : " << insts << endl << "Loads : " << loads << endl << "Stores: " << stores << endl; unsigned const grade = reg[0][28]; - if (grade == 1) cerr << "GRADE: PASSED\n"; - else cerr << "GRADE: FAILED " << (grade >> 1) << "\n"; + if (grade == 1) cout << "GRADE: PASSED\n"; + else cout << "GRADE: FAILED " << (grade >> 1) << "\n"; } diff --git a/simX/include/core.h b/simX/include/core.h index 46150056..b9b6c5ea 100644 --- a/simX/include/core.h +++ b/simX/include/core.h @@ -19,9 +19,9 @@ #include "Vcache_simX.h" #include "verilated.h" -#ifdef VCD_OUTPUT +// #ifdef VCD_OUTPUT #include -#endif +// #endif #include "trace.h" @@ -92,17 +92,34 @@ namespace Harp { public: Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id=0); - Vcache_simX * cache_simulator; + Vcache_simX * cache_simulator; + VerilatedVcdC * m_trace; + + bool renameTable[32][32]; + bool stallWarp[32]; + bool foundSchedule; + + trace_inst_t inst_in_fetch; + trace_inst_t inst_in_decode; + trace_inst_t inst_in_scheduler; + trace_inst_t inst_in_exe; + trace_inst_t inst_in_lsu; + trace_inst_t inst_in_wb; + + bool release_warp; + int release_warp_num; bool interrupt(Word r0); bool running() const; + void getCacheDelays(trace_inst_t *); + void warpScheduler(); void fetch(); void decode(); void scheduler(); - void gpr_read(); void execute_unit(); void load_store(); + void writeback(); void step(); @@ -117,6 +134,7 @@ namespace Harp { unsigned long steps; std::vector w; std::map > b; // Barriers + int schedule_w; }; class Warp { diff --git a/simX/include/instruction.h b/simX/include/instruction.h index 8d743f56..efe99769 100644 --- a/simX/include/instruction.h +++ b/simX/include/instruction.h @@ -8,7 +8,7 @@ #include #include "types.h" - +#include "trace.h" namespace Harp { class Warp; class Ref; @@ -79,7 +79,7 @@ namespace Harp { { } - void executeOn(Warp &warp); + void executeOn(Warp &warp, trace_inst_t *); friend std::ostream &operator<<(std::ostream &, Instruction &); /* Setters used to "craft" the instruction. */ diff --git a/simX/include/trace.h b/simX/include/trace.h index aee78a11..4cbc27ed 100644 --- a/simX/include/trace.h +++ b/simX/include/trace.h @@ -28,6 +28,9 @@ namespace Harp { // Instruction execute bool stall_warp; + bool wspawn; + + bool stalled; } trace_inst_t; } \ No newline at end of file diff --git a/simX/instruction.cpp b/simX/instruction.cpp index 5e6c2b0f..b1354a98 100644 --- a/simX/instruction.cpp +++ b/simX/instruction.cpp @@ -78,7 +78,7 @@ Word signExt(Word w, Size bit, Word mask) { return w; } -void Instruction::executeOn(Warp &c) { +void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) { D(3, "Begin instruction execute."); /* If I try to execute a privileged instruction in user mode, throw an @@ -357,14 +357,14 @@ void Instruction::executeOn(Warp &c) { break; case L_INST: //std::cout << "L_INST\n"; + memAddr = ((reg[rsrc[0]] + immsrc) & 0xFFFFFFFC); shift_by = ((reg[rsrc[0]] + immsrc) & 0x00000003) * 8; data_read = c.core->mem.read(memAddr, c.supervisorMode); + trace_inst->is_lw = true; + trace_inst->mem_addresses[t] = memAddr; // //std::cout < data_read: " << data_read << "\n"; -#ifdef EMU_INSTRUMENTATION - Harp::OSDomain::osDomain-> - do_mem(0, memAddr, c.core->mem.virtToPhys(memAddr), 8, true); -#endif + switch (func3) { @@ -484,6 +484,8 @@ void Instruction::executeOn(Warp &c) { ++c.stores; memAddr = reg[rsrc[0]] + immsrc; std::cout << "STORE MEM ADDRESS: " << std::hex << reg[rsrc[0]] << " + " << immsrc << "\n"; + trace_inst->is_sw = true; + trace_inst->mem_addresses[t] = memAddr; // //std::cout << "FUNC3: " << func3 << "\n"; if ((memAddr == 0x00010000) && (t == 0)) { @@ -517,6 +519,7 @@ void Instruction::executeOn(Warp &c) { break; case B_INST: //std::cout << "B_INST\n"; + trace_inst->stall_warp = true; switch (func3) { case 0: @@ -579,6 +582,7 @@ void Instruction::executeOn(Warp &c) { break; case JAL_INST: //std::cout << "JAL_INST\n"; + trace_inst->stall_warp = true; if (!pcSet) nextPc = (c.pc - 4) + immsrc; if (!pcSet) {/*std::cout << "JAL... SETTING PC: " << nextPc << "\n"; */} if (rdest != 0) @@ -589,6 +593,7 @@ void Instruction::executeOn(Warp &c) { break; case JALR_INST: std::cout << "JALR_INST\n"; + trace_inst->stall_warp = true; if (!pcSet) nextPc = reg[rsrc[0]] + immsrc; if (!pcSet) {/*std::cout << "JALR... SETTING PC: " << nextPc << "\n";*/ } if (rdest != 0) @@ -704,6 +709,7 @@ void Instruction::executeOn(Warp &c) { case 1: // WSPAWN std::cout << "WSPAWN\n"; + trace_inst->wspawn = true; if (sjOnce) { sjOnce = false; @@ -745,6 +751,7 @@ void Instruction::executeOn(Warp &c) { { // SPLIT //std::cout << "SPLIT\n"; + trace_inst->stall_warp = true; if (sjOnce) { sjOnce = false; @@ -812,12 +819,13 @@ void Instruction::executeOn(Warp &c) { } break; case 4: + trace_inst->stall_warp = true; // is_barrier break; case 0: // TMC //std::cout << "JALRS\n"; - + trace_inst->stall_warp = true; nextActiveThreads = reg[rsrc[0]]; { for (int ff = 0; ff < c.tmask.size(); ff++) diff --git a/simX/simX.cpp b/simX/simX.cpp index 3a576f81..2e853255 100644 --- a/simX/simX.cpp +++ b/simX/simX.cpp @@ -104,6 +104,24 @@ int emu_main(int argc, char **argv) { // core.w[0].pc = 0x8000007c; // If I want to start at a specific location std::cout << "ABOUT TO START\n"; + // bool count_down = false; + // int cycles_left; + // while (!count_down || (count_down && (cycles_left == 0))) + // { + + // if (count_down) + // { + // cycles_left--; + // } + + // console.poll(); + // core.step(); + // bool run = core.running(); + // if (!run) + // { + // count_down = true; + // } + // } while (core.running()) { console.poll(); core.step(); } if (showStats) core.printStats(); diff --git a/simX/test_riscv.sh b/simX/test_riscv.sh index 54ff7147..e181a981 100644 --- a/simX/test_riscv.sh +++ b/simX/test_riscv.sh @@ -3,4 +3,5 @@ echo start > results.txt # echo ../kernel/vortex_test.hex make clean make +echo "Fasten your seatbelts ladies and gentelmen!!" cd obj_dir && ./Vcache_simX -E -a rv32i --core ../../runtime/mains/dev/vx_dev_main.hex -s -b 1> emulator.debug