diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index 3b2295fc..07766670 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -18,7 +18,7 @@ DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \ #MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=4 MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -#DEBUG = 1 +#DEBUG=1 #AFU=1 CFLAGS += -fPIC diff --git a/driver/rtlsim/vortex.cpp b/driver/rtlsim/vortex.cpp index 95314f62..5550c821 100644 --- a/driver/rtlsim/vortex.cpp +++ b/driver/rtlsim/vortex.cpp @@ -138,8 +138,11 @@ public: int flush_caches(size_t dev_maddr, size_t size) { if (future_.valid()) { future_.wait(); // ensure prior run completed - } - simulator_.flush_caches(dev_maddr, size); + } + simulator_.flush_caches(dev_maddr, size); + while (simulator_.is_busy()) { + simulator_.step(); + }; return 0; } diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 45033d57..5f0c6e75 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -170,7 +170,7 @@ `define DNUM_REQUESTS `NUM_THREADS // Snoop request tag bits -`define DSNP_TAG_WIDTH `LOG2UP(`L2SNRQ_SIZE) +`define DSNP_TAG_WIDTH ((`NUM_CORES > 1) ? `LOG2UP(`L2SNRQ_SIZE) : `L2SNP_TAG_WIDTH) ////////////////////////// Icache Configurable Knobs ////////////////////////// diff --git a/hw/rtl/Vortex_Cluster.v b/hw/rtl/Vortex_Cluster.v index cafb47a1..88f9dda0 100644 --- a/hw/rtl/Vortex_Cluster.v +++ b/hw/rtl/Vortex_Cluster.v @@ -392,6 +392,7 @@ module Vortex_Cluster #( assign per_core_snp_rsp_ready [(i/2)] = arb_snp_fwdin_ready [(i/2)]; end + if (`NUM_CORES > 1) begin VX_snp_forwarder #( .CACHE_ID (`L2CACHE_ID), .BANK_LINE_SIZE (`L2BANK_LINE_SIZE), @@ -421,6 +422,16 @@ module Vortex_Cluster #( .snp_fwdin_tag (arb_snp_fwdin_tag), .snp_fwdin_ready (arb_snp_fwdin_ready) ); + end else begin + assign arb_snp_fwdout_valid = snp_req_valid; + assign arb_snp_fwdout_addr = snp_req_addr; + assign arb_snp_fwdout_tag = snp_req_tag; + assign snp_req_ready = arb_snp_fwdout_ready; + + assign snp_rsp_valid = arb_snp_fwdin_valid; + assign snp_rsp_tag = arb_snp_fwdin_tag; + assign arb_snp_fwdin_ready = snp_rsp_ready; + end VX_dram_arb #( .BANK_LINE_SIZE (`L2BANK_LINE_SIZE), diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 8344694a..b952a64b 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -459,7 +459,7 @@ module VX_bank #( end ) wire qual_valid_st1e_2 = valid_st1e && !is_fill_st1[STAGE_1_CYCLES-1]; - wire from_mrvq_st1e_st2 = from_mrvq_st1e && !is_snp_st1e; + wire from_mrvq_st1e_st2 = from_mrvq_st1e; wire valid_st2; wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st2; @@ -721,49 +721,49 @@ module VX_bank #( if (NUM_BANKS == 1) begin always_ff @(posedge clk) begin if (core_req_valid && core_req_ready) begin - $display("%t: bank%01d%01d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(core_req_addr), core_req_tag); + $display("%t: bank%0d-%0d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(core_req_addr), core_req_tag); end if (core_rsp_valid && core_rsp_ready) begin - $display("%t: bank%01d%01d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data); + $display("%t: bank%0d-%0d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data); end if (dram_fill_req_valid && dram_fill_req_ready) begin - $display("%t: bank%01d%01d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(dram_fill_req_addr)); + $display("%t: bank%0d-%0d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(dram_fill_req_addr)); end if (dram_wb_req_valid && dram_wb_req_ready) begin - $display("%t: bank%01d%01d dram_wb req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(dram_wb_req_addr), dram_wb_req_data); + $display("%t: bank%0d-%0d dram_wb req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(dram_wb_req_addr), dram_wb_req_data); end if (dram_fill_rsp_valid && dram_fill_rsp_ready) begin - $display("%t: bank%01d%01d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(dram_fill_rsp_addr), dram_fill_rsp_data); + $display("%t: bank%0d-%0d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(dram_fill_rsp_addr), dram_fill_rsp_data); end if (snp_req_valid && snp_req_ready) begin - $display("%t: bank%01d%01d snp req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(snp_req_addr), snp_req_tag); + $display("%t: bank%0d-%0d snp req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(snp_req_addr), snp_req_tag); end if (snp_rsp_valid && snp_rsp_ready) begin - $display("%t: bank%01d%01d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag); + $display("%t: bank%0d-%0d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag); end end end else begin always_ff @(posedge clk) begin if ((|core_req_valid) && core_req_ready) begin - $display("%t: bank%01d%01d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(core_req_addr, BANK_ID), core_req_tag); + $display("%t: bank%0d-%0d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(core_req_addr, BANK_ID), core_req_tag); end if (core_rsp_valid && core_rsp_ready) begin - $display("%t: bank%01d%01d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data); + $display("%t: bank%0d-%0d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data); end if (dram_fill_req_valid && dram_fill_req_ready) begin - $display("%t: bank%01d%01d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_req_addr, BANK_ID)); + $display("%t: bank%0d-%0d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_req_addr, BANK_ID)); end if (dram_wb_req_valid && dram_wb_req_ready) begin - $display("%t: bank%01d%01d dram_wb req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_wb_req_addr, BANK_ID), dram_wb_req_data); + $display("%t: bank%0d-%0d dram_wb req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_wb_req_addr, BANK_ID), dram_wb_req_data); end if (dram_fill_rsp_valid && dram_fill_rsp_ready) begin - $display("%t: bank%01d%01d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_rsp_addr, BANK_ID), dram_fill_rsp_data); + $display("%t: bank%0d-%0d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_rsp_addr, BANK_ID), dram_fill_rsp_data); end if (snp_req_valid && snp_req_ready) begin - $display("%t: bank%01d%01d snp req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_tag); + $display("%t: bank%0d-%0d snp req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_tag); end if (snp_rsp_valid && snp_rsp_ready) begin - $display("%t: bank%01d%01d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag); + $display("%t: bank%0d-%0d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag); end end end diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index 59fbcd2f..3d1e48b9 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -208,13 +208,13 @@ module VX_cache #( assign snp_req_addr_qual = snp_req_addr; assign snp_req_tag_qual = snp_req_tag; assign snp_req_ready = snp_req_ready_qual; - end + end - assign dram_req_tag = dram_req_addr; - - assign core_req_ready = (& per_bank_core_req_ready); - assign dram_rsp_ready = (| per_bank_dram_fill_rsp_ready); - assign snp_req_ready_qual = (& per_bank_snp_req_ready); + if (NUM_BANKS == 1) begin + assign snp_req_ready_qual = per_bank_snp_req_ready; + end else begin + assign snp_req_ready_qual = per_bank_snp_req_ready[`DRAM_ADDR_BANK(snp_req_addr_qual)]; + end VX_cache_core_req_bank_sel #( .BANK_LINE_SIZE (BANK_LINE_SIZE), @@ -223,11 +223,17 @@ module VX_cache #( .NUM_REQUESTS (NUM_REQUESTS) ) cache_core_req_bank_sel ( .core_req_valid (core_req_valid), + .per_bank_ready (per_bank_core_req_ready), .core_req_addr (core_req_addr), - .per_bank_valid (per_bank_valid) + .per_bank_valid (per_bank_valid), + .core_req_ready (core_req_ready) ); + assign dram_req_tag = dram_req_addr; + assign dram_rsp_ready = (| per_bank_dram_fill_rsp_ready); + genvar i; + generate for (i = 0; i < NUM_BANKS; i++) begin wire [NUM_REQUESTS-1:0] curr_bank_core_req_valid; @@ -270,7 +276,7 @@ module VX_cache #( wire curr_bank_core_req_ready; // Core Req - assign curr_bank_core_req_valid = per_bank_valid[i] & {NUM_REQUESTS{core_req_ready}}; + assign curr_bank_core_req_valid = (per_bank_valid[i] & {NUM_REQUESTS{core_req_ready}}); assign curr_bank_core_req_addr = core_req_addr; assign curr_bank_core_req_rw = core_req_rw; assign curr_bank_core_req_byteen = core_req_byteen; diff --git a/hw/rtl/cache/VX_cache_core_req_bank_sel.v b/hw/rtl/cache/VX_cache_core_req_bank_sel.v index 0fb28ed5..46efa586 100644 --- a/hw/rtl/cache/VX_cache_core_req_bank_sel.v +++ b/hw/rtl/cache/VX_cache_core_req_bank_sel.v @@ -11,28 +11,35 @@ module VX_cache_core_req_bank_sel #( // Number of Word requests per cycle {1, 2, 4, 8, ...} parameter NUM_REQUESTS = 0 ) ( - input wire [NUM_REQUESTS-1:0] core_req_valid, + input wire [NUM_REQUESTS-1:0] core_req_valid, `IGNORE_WARNINGS_BEGIN - input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr, + input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr, `IGNORE_WARNINGS_END - output reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid + input wire [NUM_BANKS-1:0] per_bank_ready, + output reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid, + output wire core_req_ready ); integer i; if (NUM_BANKS == 1) begin - always @(*) begin + always @(*) begin per_bank_valid = 0; for (i = 0; i < NUM_REQUESTS; i++) begin per_bank_valid[0][i] = core_req_valid[i]; end - end - end else begin + end + assign core_req_ready = per_bank_ready; + end else begin + reg [NUM_BANKS-1:0] per_bank_ready_sel; always @(*) begin per_bank_valid = 0; + per_bank_ready_sel = {NUM_BANKS{1'b1}}; for (i = 0; i < NUM_REQUESTS; i++) begin per_bank_valid[core_req_addr[i][`BANK_SELECT_ADDR_RNG]][i] = core_req_valid[i]; + per_bank_ready_sel[core_req_addr[i][`BANK_SELECT_ADDR_RNG]] = 0; end - end + end + assign core_req_ready = & (per_bank_ready | per_bank_ready_sel); end endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v index 507b1406..53e11575 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_cache_miss_resrv.v @@ -72,10 +72,6 @@ module VX_cache_miss_resrv #( wire enqueue_possible = !miss_resrv_full; wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr; -`IGNORE_WARNINGS_BEGIN - wire [31:0] make_ready_push_full; -`IGNORE_WARNINGS_END - reg [MRVQ_SIZE-1:0] make_ready; reg [MRVQ_SIZE-1:0] make_ready_push; reg [MRVQ_SIZE-1:0] valid_address_match; @@ -93,24 +89,21 @@ module VX_cache_miss_resrv #( wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr]; wire [`LOG2UP(MRVQ_SIZE)-1:0] dequeue_index = schedule_ptr; - assign miss_resrv_valid_st0 = (MRVQ_SIZE != 2) && dequeue_possible; + assign miss_resrv_valid_st0 = dequeue_possible; assign miss_resrv_addr_st0 = addr_table[dequeue_index]; assign {miss_resrv_data_st0, miss_resrv_tid_st0, miss_resrv_tag_st0, miss_resrv_rw_st0, miss_resrv_byteen_st0, miss_resrv_wsel_st0, miss_resrv_is_snp_st0} = metadata_table[dequeue_index]; - wire mrvq_push = miss_add && enqueue_possible && !from_mrvq && (MRVQ_SIZE != 2); + wire mrvq_push = miss_add && enqueue_possible && !from_mrvq; wire mrvq_pop = miss_resrv_pop && dequeue_possible; - wire recover_state = miss_add && from_mrvq; wire increment_head = !miss_add && from_mrvq; - wire update_ready = (|make_ready); wire qual_mrvq_init = mrvq_push && mrvq_init_ready_state; - assign make_ready_push_full = ({31'b0, qual_mrvq_init} << enqueue_index); - assign make_ready_push = make_ready_push_full[MRVQ_SIZE-1:0]; + assign make_ready_push = (MRVQ_SIZE'(qual_mrvq_init)) << enqueue_index; always @(posedge clk) begin if (reset) begin @@ -160,12 +153,12 @@ module VX_cache_miss_resrv #( integer j; if (NUM_BANKS == 1) begin always_ff @(posedge clk) begin - if (mrvq_push || mrvq_pop) begin - $write("%t: bank%02d:%01d msrq: push=%b pop=%b", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop); + if (mrvq_push || mrvq_pop || increment_head || recover_state) begin + $write("%t: bank%0d-%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state); for (j = 0; j < MRVQ_SIZE; j++) begin if (valid_table[j]) begin $write(" "); - if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*"); + if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*"); if (~ready_table[j]) $write("!"); $write("addr%0d=%0h", j, {addr_table[j], `BASE_ADDR_BITS'(0)}); end @@ -175,12 +168,12 @@ module VX_cache_miss_resrv #( end end else begin always_ff @(posedge clk) begin - if (mrvq_push || mrvq_pop) begin - $write("%t: bank%02d:%01d msrq: push=%b pop=%b", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop); + if (mrvq_push || mrvq_pop || increment_head || recover_state) begin + $write("%t: bank%0d-%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state); for (j = 0; j < MRVQ_SIZE; j++) begin if (valid_table[j]) begin $write(" "); - if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*"); + if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*"); if (~ready_table[j]) $write("!"); $write("addr%0d=%0h", j, `LINE_TO_BYTE_ADDR(addr_table[j], BANK_ID)); end diff --git a/hw/rtl/cache/VX_snp_forwarder.v b/hw/rtl/cache/VX_snp_forwarder.v index fe8ac9ba..43b96f8c 100644 --- a/hw/rtl/cache/VX_snp_forwarder.v +++ b/hw/rtl/cache/VX_snp_forwarder.v @@ -34,6 +34,8 @@ module VX_snp_forwarder #( input wire [NUM_REQUESTS-1:0][`LOG2UP(SNRQ_SIZE)-1:0] snp_fwdin_tag, output wire [NUM_REQUESTS-1:0] snp_fwdin_ready ); + `STATIC_ASSERT(NUM_REQUESTS > 1, "invalid value"); + reg [`REQS_BITS:0] pending_cntrs [SNRQ_SIZE-1:0]; reg [`REQS_BITS-1:0] fwdin_sel; @@ -43,12 +45,12 @@ module VX_snp_forwarder #( wire fwdin_valid; wire [`LOG2UP(SNRQ_SIZE)-1:0] fwdin_tag; - wire fwdin_ready = snp_rsp_ready; - wire fwdin_taken = fwdin_valid && fwdin_ready; + wire fwdin_ready = snp_rsp_ready || (1 != pending_cntrs[sfq_read_addr]); + wire fwdin_fire = fwdin_valid && fwdin_ready; wire fwdout_ready = (& snp_fwdout_ready); - assign snp_rsp_valid = fwdin_taken && (1 == pending_cntrs[sfq_read_addr]); // send response + assign snp_rsp_valid = fwdin_valid && (1 == pending_cntrs[sfq_read_addr]); // send response assign sfq_read_addr = fwdin_tag; @@ -77,7 +79,7 @@ module VX_snp_forwarder #( if (sfq_push) begin pending_cntrs[sfq_write_addr] <= NUM_REQUESTS; end - if (fwdin_taken) begin + if (fwdin_fire) begin pending_cntrs[sfq_read_addr] <= pending_cntrs[sfq_read_addr] - 1; assert(sfq_read_addr == dbg_sfq_write_addr); end @@ -112,16 +114,16 @@ module VX_snp_forwarder #( `ifdef DBG_PRINT_CACHE_SNP always_ff @(posedge clk) begin if (snp_req_valid && snp_req_ready) begin - $display("%t: cache%01d snp req: addr=%0h, tag=%0h", $time, CACHE_ID, `DRAM_TO_BYTE_ADDR(snp_req_addr), snp_req_tag); + $display("%t: cache%0d snp req: addr=%0h, tag=%0h", $time, CACHE_ID, `DRAM_TO_BYTE_ADDR(snp_req_addr), snp_req_tag); end if (snp_fwdout_valid[0] && snp_fwdout_ready[0]) begin - $display("%t: cache%01d snp fwd_out: addr=%0h, tag=%0h", $time, CACHE_ID, `DRAM_TO_BYTE_ADDR(snp_fwdout_addr[0]), snp_fwdout_tag[0]); + $display("%t: cache%0d snp fwd_out: addr=%0h, tag=%0h", $time, CACHE_ID, `DRAM_TO_BYTE_ADDR(snp_fwdout_addr[0]), snp_fwdout_tag[0]); end if (fwdin_valid && fwdin_ready) begin - $display("%t: cache%01d snp fwd_in[%01d]: tag=%0h", $time, CACHE_ID, fwdin_sel, fwdin_tag); + $display("%t: cache%0d snp fwd_in[%01d]: tag=%0h", $time, CACHE_ID, fwdin_sel, fwdin_tag); end if (snp_rsp_valid && snp_rsp_ready) begin - $display("%t: cache%01d snp rsp: addr=%0h, tag=%0h", $time, CACHE_ID, snp_rsp_addr, snp_rsp_tag); + $display("%t: cache%0d snp rsp: addr=%0h, tag=%0h", $time, CACHE_ID, snp_rsp_addr, snp_rsp_tag); end end `endif diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index 677b3760..9c70faeb 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -11,11 +11,13 @@ double sc_time_stamp() { Simulator::Simulator() { // force random values for unitialized signals - Verilated::randReset(2); + Verilated::randReset(1); ram_ = nullptr; vortex_ = new VVortex_Socket(); + snp_req_active_ = false; + #ifdef VCD_OUTPUT Verilated::traceEverOn(true); trace_ = new VerilatedVcdC; @@ -36,6 +38,214 @@ void Simulator::attach_ram(RAM* ram) { dram_rsp_vec_.clear(); } +void Simulator::reset() { +#ifndef NDEBUG + std::cout << timestamp << ": [sim] reset()" << std::endl; +#endif + + vortex_->reset = 1; + this->step(); + vortex_->reset = 0; + + dram_rsp_vec_.clear(); +} + +void Simulator::step() { + vortex_->clk = 0; + this->eval(); + + vortex_->clk = 1; + this->eval(); + + this->eval_dram_bus(); + this->eval_io_bus(); + this->eval_snp_bus(); +} + +void Simulator::eval() { + vortex_->eval(); +#ifdef VCD_OUTPUT + trace_->dump(timestamp); +#endif + ++timestamp; +} + +void Simulator::eval_dram_bus() { + if (ram_ == nullptr) { + vortex_->dram_req_ready = 0; + return; + } + + // handle DRAM response cycle + int dequeue_index = -1; + for (int i = 0; i < dram_rsp_vec_.size(); i++) { + if (dram_rsp_vec_[i].cycles_left > 0) { + dram_rsp_vec_[i].cycles_left -= 1; + } + if ((dequeue_index == -1) + && (dram_rsp_vec_[i].cycles_left == 0)) { + dequeue_index = i; + } + } + + // handle DRAM response message + if ((dequeue_index != -1) + && vortex_->dram_rsp_ready) { + vortex_->dram_rsp_valid = 1; + memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_vec_[dequeue_index].data, GLOBAL_BLOCK_SIZE); + vortex_->dram_rsp_tag = dram_rsp_vec_[dequeue_index].tag; + free(dram_rsp_vec_[dequeue_index].data); + dram_rsp_vec_.erase(dram_rsp_vec_.begin() + dequeue_index); + } else { + vortex_->dram_rsp_valid = 0; + } + + // handle DRAM stalls + bool dram_stalled = false; +#ifdef ENABLE_DRAM_STALLS + if (0 == ((timestamp/2) % DRAM_STALLS_MODULO)) { + dram_stalled = true; + } else + if (dram_rsp_vec_.size() >= DRAM_RQ_SIZE) { + dram_stalled = true; + } +#endif + + // handle DRAM requests + if (!dram_stalled) { + if (vortex_->dram_req_valid) { + if (vortex_->dram_req_rw) { + uint64_t byteen = vortex_->dram_req_byteen; + unsigned base_addr = (vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE); + uint8_t* data = (uint8_t*)(vortex_->dram_req_data); + for (int i = 0; i < GLOBAL_BLOCK_SIZE; i++) { + if ((byteen >> i) & 0x1) { + (*ram_)[base_addr + i] = data[i]; + } + } + } else { + dram_req_t dram_req; + dram_req.cycles_left = DRAM_LATENCY; + dram_req.data = (uint8_t*)malloc(GLOBAL_BLOCK_SIZE); + dram_req.tag = vortex_->dram_req_tag; + ram_->read(vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.data); + dram_rsp_vec_.push_back(dram_req); + } + } + } + + vortex_->dram_req_ready = ~dram_stalled; +} + +void Simulator::eval_io_bus() { + if (vortex_->io_req_valid + && vortex_->io_req_rw + && ((vortex_->io_req_addr << 2) == IO_BUS_ADDR_COUT)) { + uint32_t data_write = (uint32_t)vortex_->io_req_data; + char c = (char)data_write; + std::cout << c; + } + vortex_->io_req_ready = 1; + vortex_->io_rsp_valid = 0; +} + +void Simulator::eval_snp_bus() { + if (snp_req_active_) { + if (vortex_->snp_rsp_valid) { + assert(pending_snp_reqs_ > 0); + --pending_snp_reqs_; + #ifdef DBG_PRINT_CACHE_SNP + std::cout << timestamp << ": [sim] snp rsp: tag=" << vortex_->snp_rsp_tag << " pending=" << pending_snp_reqs_ << std::endl; + #endif + } + if (vortex_->snp_req_valid && vortex_->snp_req_ready) { + if (snp_req_size_) { + vortex_->snp_req_addr += 1; + vortex_->snp_req_tag += 1; + --snp_req_size_; + ++pending_snp_reqs_; + #ifdef DBG_PRINT_CACHE_SNP + std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << std::dec << " tag=" << vortex_->snp_req_tag << " remain=" << snp_req_size_ << std::endl; + #endif + } else { + vortex_->snp_req_valid = 0; + } + } + if (!vortex_->snp_req_valid + && 0 == pending_snp_reqs_) { + snp_req_active_ = false; + } + } else { + vortex_->snp_req_valid = 0; + vortex_->snp_rsp_ready = 0; + } +} + +void Simulator::wait(uint32_t cycles) { + for (int i = 0; i < cycles; ++i) { + this->step(); + } +} + +bool Simulator::is_busy() { + return vortex_->busy || snp_req_active_; +} + +void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) { +#ifndef NDEBUG + std::cout << timestamp << ": [sim] flush_caches()" << std::endl; +#endif + if (0 == size) + return; + + snp_req_active_ = true; + snp_req_size_ = (size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE; + + vortex_->snp_req_addr = mem_addr / GLOBAL_BLOCK_SIZE; + vortex_->snp_req_tag = 0; + vortex_->snp_req_valid = 1; + vortex_->snp_rsp_ready = 1; + + --snp_req_size_; + pending_snp_reqs_ = 1; + + #ifdef DBG_PRINT_CACHE_SNP + std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << std::dec << " tag=" << vortex_->snp_req_tag << " remain=" << snp_req_size_ << std::endl; + #endif +} + +bool Simulator::run() { +#ifndef NDEBUG + std::cout << timestamp << ": [sim] run()" << std::endl; +#endif + + // reset the device + this->reset(); + + // execute program + while (vortex_->busy + && !vortex_->ebreak) { + this->step(); + } + + // wait 5 cycles to flush the pipeline + this->wait(5); + + // check riscv-tests PASSED/FAILED status +#if (NUM_CLUSTERS == 1 && NUM_CORES == 1) + int status = (int)vortex_->Vortex_Socket->genblk1__DOT__Vortex_Cluster->genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf; +#else +#if (NUM_CLUSTERS == 1) + int status = (int)vortex_->Vortex_Socket->genblk1__DOT__Vortex_Cluster->genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf; +#else + int status = (int)vortex_->Vortex_Socket->genblk2__DOT__genblk1__BRA__0__KET____DOT__Vortex_Cluster->genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf; +#endif +#endif + + return (status == 1); +} + + void Simulator::load_bin(const char* program_file) { if (ram_ == nullptr) return; @@ -123,202 +333,4 @@ void Simulator::load_ihex(const char* program_file) { void Simulator::print_stats(std::ostream& out) { out << std::left; out << std::setw(24) << "# of total cycles:" << std::dec << timestamp/2 << std::endl; -} - -void Simulator::dbus_driver() { - if (ram_ == nullptr) { - vortex_->dram_req_ready = 0; - return; - } - - // handle DRAM response cycle - int dequeue_index = -1; - for (int i = 0; i < dram_rsp_vec_.size(); i++) { - if (dram_rsp_vec_[i].cycles_left > 0) { - dram_rsp_vec_[i].cycles_left -= 1; - } - if ((dequeue_index == -1) - && (dram_rsp_vec_[i].cycles_left == 0)) { - dequeue_index = i; - } - } - - // handle DRAM response message - if ((dequeue_index != -1) - && vortex_->dram_rsp_ready) { - vortex_->dram_rsp_valid = 1; - memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_vec_[dequeue_index].data, GLOBAL_BLOCK_SIZE); - vortex_->dram_rsp_tag = dram_rsp_vec_[dequeue_index].tag; - free(dram_rsp_vec_[dequeue_index].data); - dram_rsp_vec_.erase(dram_rsp_vec_.begin() + dequeue_index); - } else { - vortex_->dram_rsp_valid = 0; - } - - // handle DRAM stalls - bool dram_stalled = false; -#ifdef ENABLE_DRAM_STALLS - if (0 == ((timestamp/2) % DRAM_STALLS_MODULO)) { - dram_stalled = true; - } else - if (dram_rsp_vec_.size() >= DRAM_RQ_SIZE) { - dram_stalled = true; - } -#endif - - // handle DRAM requests - if (!dram_stalled) { - if (vortex_->dram_req_valid) { - if (vortex_->dram_req_rw) { - uint64_t byteen = vortex_->dram_req_byteen; - unsigned base_addr = (vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE); - uint8_t* data = (uint8_t*)(vortex_->dram_req_data); - for (int i = 0; i < GLOBAL_BLOCK_SIZE; i++) { - if ((byteen >> i) & 0x1) { - (*ram_)[base_addr + i] = data[i]; - } - } - } else { - dram_req_t dram_req; - dram_req.cycles_left = DRAM_LATENCY; - dram_req.data = (uint8_t*)malloc(GLOBAL_BLOCK_SIZE); - dram_req.tag = vortex_->dram_req_tag; - ram_->read(vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.data); - dram_rsp_vec_.push_back(dram_req); - } - } - } - - vortex_->dram_req_ready = ~dram_stalled; -} - -void Simulator::io_driver() { - if (vortex_->io_req_valid - && vortex_->io_req_rw - && ((vortex_->io_req_addr << 2) == IO_BUS_ADDR_COUT)) { - uint32_t data_write = (uint32_t)vortex_->io_req_data; - char c = (char)data_write; - std::cout << c; - } - vortex_->io_req_ready = 1; - vortex_->io_rsp_valid = 0; -} - -void Simulator::reset() { -#ifndef NDEBUG - std::cout << timestamp << ": [sim] reset()" << std::endl; -#endif - vortex_->reset = 1; - this->step(); - vortex_->reset = 0; - - dram_rsp_vec_.clear(); -} - -void Simulator::step() { - vortex_->clk = 0; - this->eval(); - - vortex_->clk = 1; - this->eval(); - - dbus_driver(); - io_driver(); -} - -void Simulator::eval() { - vortex_->eval(); -#ifdef VCD_OUTPUT - trace_->dump(timestamp); -#endif - ++timestamp; -} - -void Simulator::wait(uint32_t cycles) { - for (int i = 0; i < cycles; ++i) { - this->step(); - } -} - -bool Simulator::is_busy() { - return vortex_->busy; -} - -void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) { -#ifndef NDEBUG - std::cout << timestamp << ": [sim] flush_caches()" << std::endl; -#endif - // align address to LLC block boundaries - auto aligned_addr_start = mem_addr / GLOBAL_BLOCK_SIZE; - auto aligned_addr_end = (mem_addr + size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE; - - // submit snoop requests for the needed blocks - vortex_->snp_req_addr = aligned_addr_start; - vortex_->snp_req_tag = 0; - vortex_->snp_req_valid = 1; - vortex_->snp_rsp_ready = 1; - - int pending_snp_reqs = 1; - -#ifdef DBG_PRINT_CACHE_SNP - std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << std::dec << " tag=" << vortex_->snp_req_tag << " remain=" << (aligned_addr_end - vortex_->snp_req_addr - 1) << std::endl; -#endif - - for (;;) { - this->step(); - if (vortex_->snp_rsp_valid) { - assert(pending_snp_reqs > 0); - --pending_snp_reqs; - #ifdef DBG_PRINT_CACHE_SNP - std::cout << timestamp << ": [sim] snp rsp: tag=" << vortex_->snp_rsp_tag << " pending=" << pending_snp_reqs << std::endl; - #endif - } - if (vortex_->snp_req_valid && vortex_->snp_req_ready) { - if (vortex_->snp_req_addr + 1 < aligned_addr_end) { - vortex_->snp_req_addr += 1; - vortex_->snp_req_tag += 1; - ++pending_snp_reqs; - #ifdef DBG_PRINT_CACHE_SNP - std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << std::dec << " tag=" << vortex_->snp_req_tag << " remain=" << (aligned_addr_end - vortex_->snp_req_addr - 1) << std::endl; - #endif - } else { - vortex_->snp_req_valid = 0; - } - } - if (!vortex_->snp_req_valid - && 0 == pending_snp_reqs) { - break; - } - } -} - -bool Simulator::run() { -#ifndef NDEBUG - std::cout << timestamp << ": [sim] run()" << std::endl; -#endif - - // reset the device - this->reset(); - - // execute program - while (vortex_->busy - && !vortex_->ebreak) { - this->step(); - } - - // wait 5 cycles to flush the pipeline - this->wait(5); - - // check riscv-tests PASSED/FAILED status -#if (NUM_CLUSTERS == 1 && NUM_CORES == 1) - int status = (int)vortex_->Vortex_Socket->genblk1__DOT__Vortex_Cluster->genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf; -#else -#if (NUM_CLUSTERS == 1) - int status = (int)vortex_->Vortex_Socket->genblk1__DOT__Vortex_Cluster->genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf; -#else - int status = (int)vortex_->Vortex_Socket->genblk2__DOT__genblk1__BRA__0__KET____DOT__Vortex_Cluster->genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf; -#endif -#endif - - return (status == 1); } \ No newline at end of file diff --git a/hw/simulate/simulator.h b/hw/simulate/simulator.h index d68ae04d..c3467494 100644 --- a/hw/simulate/simulator.h +++ b/hw/simulate/simulator.h @@ -35,6 +35,7 @@ public: void load_ihex(const char* program_file); bool is_busy(); + void reset(); void step(); void wait(uint32_t cycles); @@ -48,14 +49,19 @@ public: private: void eval(); - void dbus_driver(); - void io_driver(); + + void eval_dram_bus(); + void eval_io_bus(); + void eval_snp_bus(); std::vector dram_rsp_vec_; + + uint32_t snp_req_active_; + uint32_t snp_req_size_; + uint32_t pending_snp_reqs_; RAM *ram_; VVortex_Socket *vortex_; - bool enable_; #ifdef VCD_OUTPUT VerilatedVcdC *trace_; #endif