diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index 62dcea69..f04301d6 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -938,7 +938,7 @@ always @(posedge clk) begin vx_snp_req_tag <= (`VX_SNP_TAG_WIDTH)'(snp_req_ctr_next); snp_req_ctr <= snp_req_ctr_next; `ifdef DBG_PRINT_OPAE - $display("%t: AFU Snp Req: addr=%0h, tag=%0d, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(vx_snp_req_addr), (`VX_SNP_TAG_WIDTH)'(vx_snp_req_tag), (snp_req_size - snp_req_ctr_next)); + $display("%t: AFU Snp Req: addr=%0h, tag=%0h, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(vx_snp_req_addr), (`VX_SNP_TAG_WIDTH)'(vx_snp_req_tag), (snp_req_size - snp_req_ctr_next)); `endif end @@ -947,7 +947,7 @@ always @(posedge clk) begin assert(snp_rsp_ctr != 0); snp_rsp_ctr <= snp_rsp_ctr_next; `ifdef DBG_PRINT_OPAE - $display("%t: AFU Snp Rsp: tag=%0d, rem=%0d", $time, vx_snp_rsp_tag, snp_rsp_ctr_next); + $display("%t: AFU Snp Rsp: tag=%0h, rem=%0d", $time, vx_snp_rsp_tag, snp_rsp_ctr_next); `endif end end diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 00fabefd..507f0189 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -91,6 +91,10 @@ module VX_lsu_unit #( wire [1:0] rsp_sext; reg [`NUM_THREADS-1:0][31:0] rsp_data; +`DEBUG_BLOCK( + reg [`LSUQ_SIZE-1:0][`DCORE_TAG_WIDTH-1:0] pending_tags; +) + reg [`LSUQ_SIZE-1:0][`NUM_THREADS-1:0] mem_rsp_mask; wire [`DCORE_TAG_ID_BITS-1:0] req_tag, rsp_tag; @@ -113,7 +117,7 @@ module VX_lsu_unit #( ) lsu_cam ( .clk (clk), .reset (reset), - .write_addr (req_tag), + .write_addr (req_tag), .acquire_slot (lsuq_push), .read_addr (rsp_tag), .write_data ({req_wid, req_pc, req_rd, req_wb, req_offset, req_sext}), @@ -126,6 +130,7 @@ module VX_lsu_unit #( always @(posedge clk) begin if (lsuq_push) begin mem_rsp_mask[req_tag] <= req_tmask; + pending_tags[req_tag] <= dcache_req_if.tag; end if (lsuq_pop_part) begin mem_rsp_mask[rsp_tag] <= mem_rsp_mask_n; @@ -215,6 +220,13 @@ module VX_lsu_unit #( $display("%t: D$%0d rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h", $time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_pc, dcache_rsp_if.tag, rsp_rd, dcache_rsp_if.data); end + if (lsuq_full) begin + $write("%t: D$%0d queue-full:", $time, CORE_ID); + for (integer j = 0; j < `LSUQ_SIZE; j++) begin + $write(" tag%0d=%0h", j, pending_tags[j]); + end + $write("\n"); + end end `endif diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index ba2f7c9d..0b460f85 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -833,25 +833,25 @@ module VX_bank #( $display("%t: cache%0d:%0d pipeline-stall: msrq=%b, cwbq=%b, dwbq=%b, snpq=%b", $time, CACHE_ID, BANK_ID, msrq_push_stall, cwbq_push_stall, dwbq_push_stall, snpq_push_stall); end if (dfpq_pop) begin - $display("%t: cache%0d:%0d dram-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), dfpq_filldata_st0); + $display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), dfpq_filldata_st0); end if (reqq_pop) begin - $display("%t: cache%0d:%0d core-req: addr=%0h, tag=%0d, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), reqq_tag_st0, debug_wid_st0, debug_pc_st0); + $display("%t: cache%0d:%0d core-req: addr=%0h, tag=%0h, tid=%0d, rw=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), reqq_tag_st0, reqq_tid_st0, reqq_rw_st0, debug_wid_st0, debug_pc_st0); end if (snrq_pop) begin - $display("%t: cache%0d:%0d snp-req: addr=%0h, tag=%0d, invalidate=%0d", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), snrq_tag_st0, snrq_invalidate_st0); + $display("%t: cache%0d:%0d snp-req: addr=%0h, tag=%0h, invalidate=%0d", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), snrq_tag_st0, snrq_invalidate_st0); end if (cwbq_push) begin - $display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID), cwbq_tag_st3, cwbq_data_st3, debug_wid_st3, debug_pc_st3); + $display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, tid=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID), cwbq_tag_st3, cwbq_tid_st3, cwbq_data_st3, debug_wid_st3, debug_pc_st3); end if (dwbq_push) begin if (dwbq_is_dwb_in) - $display("%t: cache%0d:%0d dram-wb: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dwbq_req_addr, BANK_ID), readdata_st3, dirtyb_st3, debug_wid_st3, debug_pc_st3); + $display("%t: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dwbq_req_addr, BANK_ID), readdata_st3, dirtyb_st3, debug_wid_st3, debug_pc_st3); else - $display("%t: cache%0d:%0d dram-fill: addr=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dwbq_req_addr, BANK_ID), debug_wid_st3, debug_pc_st3); + $display("%t: cache%0d:%0d fill-req: addr=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dwbq_req_addr, BANK_ID), debug_wid_st3, debug_pc_st3); end if (snpq_push) begin - $display("%t: cache%0d:%0d snp-rsp: addr=%0h, tag=%0d", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID), snpq_tag_st3); + $display("%t: cache%0d:%0d snp-rsp: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID), snpq_tag_st3); end end `endif diff --git a/hw/rtl/cache/VX_bank_core_req_arb.v b/hw/rtl/cache/VX_bank_core_req_arb.v index f806a04e..b398e0f6 100644 --- a/hw/rtl/cache/VX_bank_core_req_arb.v +++ b/hw/rtl/cache/VX_bank_core_req_arb.v @@ -15,125 +15,168 @@ module VX_bank_core_req_arb #( input wire clk, input wire reset, - // Enqueue Data + // Enqueue input wire push, input wire [NUM_REQUESTS-1:0] valids_in, + input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] tag_in, + input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] addr_in, input wire [`CORE_REQ_TAG_COUNT-1:0] rw_in, input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] byteen_in, input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] writedata_in, - input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] addr_in, - input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] tag_in, - // Dequeue Data - input wire pop, - output wire [`REQS_BITS-1:0] tid_out, - output wire rw_out, - output wire [WORD_SIZE-1:0] byteen_out, - output wire [`WORD_ADDR_WIDTH-1:0] addr_out, - output wire [`WORD_WIDTH-1:0] writedata_out, - output wire [CORE_TAG_WIDTH-1:0] tag_out, + // Dequeue + input wire pop, + output wire [CORE_TAG_WIDTH-1:0] tag_out, + output wire [`WORD_ADDR_WIDTH-1:0] addr_out, + output wire rw_out, + output wire [WORD_SIZE-1:0] byteen_out, + output wire [`WORD_WIDTH-1:0] writedata_out, + output wire [`REQS_BITS-1:0] tid_out, - // State Data - output wire empty, - output wire full + // States + output wire empty, + output wire full ); - wire [NUM_REQUESTS-1:0] out_per_valids; - wire [`CORE_REQ_TAG_COUNT-1:0] out_per_rw; - wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] out_per_byteen; - wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] out_per_addr; - wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] out_per_writedata; - wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] out_per_tag; - - reg [NUM_REQUESTS-1:0] use_per_valids; - reg [`CORE_REQ_TAG_COUNT-1:0] use_per_rw; - reg [NUM_REQUESTS-1:0][WORD_SIZE-1:0] use_per_byteen; - reg [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] use_per_addr; - reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] use_per_writedata; - reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] use_per_tag; - - wire [NUM_REQUESTS-1:0] qual_valids; - wire [`CORE_REQ_TAG_COUNT-1:0] qual_rw; - wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] qual_byteen; - wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] qual_addr; - wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] qual_writedata; - wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] qual_tag; - - wire o_empty; - - wire use_empty = !(| use_per_valids); - wire out_empty = !(| out_per_valids) || o_empty; - - wire push_qual = push && !full; - wire pop_qual = !out_empty && use_empty; + wire [NUM_REQUESTS-1:0] q_valids; + wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] q_tag; + wire [`CORE_REQ_TAG_COUNT-1:0] q_rw; + wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] q_byteen; + wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] q_addr; + wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] q_writedata; + wire q_push; + wire q_pop; + wire q_empty; + wire q_full; + + always @(*) begin + assert(!push || (| valids_in)); + assert(!push || !full); + assert(!pop || !empty); + end VX_generic_queue #( - .DATAW($bits(valids_in) + $bits(addr_in) + $bits(writedata_in) + $bits(tag_in) + $bits(rw_in) + $bits(byteen_in)), + .DATAW($bits(valids_in) + $bits(tag_in) + $bits(addr_in) + $bits(rw_in) + $bits(byteen_in) + $bits(writedata_in)), .SIZE(CREQ_SIZE) - ) reqq_queue ( + ) req_queue ( .clk (clk), .reset (reset), - .push (push_qual), - .data_in ({valids_in, rw_in, byteen_in, addr_in, writedata_in, tag_in}), - .pop (pop_qual), - .data_out ({out_per_valids, out_per_rw, out_per_byteen, out_per_addr, out_per_writedata, out_per_tag}), - .empty (o_empty), - .full (full), + .push (q_push), + .pop (q_pop), + .data_in ({valids_in, tag_in, addr_in, rw_in, byteen_in, writedata_in}), + .data_out ({q_valids, q_tag, q_addr, q_rw, q_byteen, q_writedata}), + .empty (q_empty), + .full (q_full), `UNUSED_PIN (size) ); - wire[NUM_REQUESTS-1:0] real_out_per_valids = out_per_valids & {NUM_REQUESTS{~out_empty}}; + if (NUM_REQUESTS > 1) begin - assign qual_valids = use_per_valids; - assign qual_addr = use_per_addr; - assign qual_writedata = use_per_writedata; - assign qual_tag = use_per_tag; - assign qual_rw = use_per_rw; - assign qual_byteen = use_per_byteen; + reg [CORE_TAG_WIDTH-1:0] sel_tag; + reg [`REQS_BITS-1:0] sel_tid; + reg [`WORD_ADDR_WIDTH-1:0] sel_addr; + reg sel_rw; + reg [WORD_SIZE-1:0] sel_byteen; + reg [`WORD_WIDTH-1:0] sel_writedata; + + reg [$clog2(NUM_REQUESTS+1)-1:0] q_valids_cnt_r; + wire [$clog2(NUM_REQUESTS+1)-1:0] q_valids_cnt; + reg [NUM_REQUESTS-1:0] pop_mask; + reg fast_track; - wire sel_valid; - wire[`REQS_BITS-1:0] sel_idx; - - VX_fixed_arbiter #( - .N(NUM_REQUESTS) - ) sel_bank ( - .clk (clk), - .reset (reset), - .requests (qual_valids), - .grant_valid (sel_valid), - .grant_index (sel_idx), - `UNUSED_PIN (grant_onehot) - ); + assign q_push = push; + assign q_pop = pop && (q_valids_cnt_r == 1 || q_valids_cnt_r == 2) && !fast_track; - assign empty = !sel_valid; - assign tid_out = sel_idx; - assign byteen_out = qual_byteen[sel_idx]; - assign addr_out = qual_addr[sel_idx]; - assign writedata_out = qual_writedata[sel_idx]; - - if (CORE_TAG_ID_BITS != 0) begin - assign tag_out = qual_tag; - assign rw_out = qual_rw; - end else begin - assign tag_out = qual_tag[sel_idx]; - assign rw_out = qual_rw[sel_idx]; - end + wire [`REQS_BITS-1:0] sel_idx; + + VX_fixed_arbiter #( + .N(NUM_REQUESTS) + ) sel_arb ( + .clk (clk), + .reset (reset), + .requests (q_valids & ~pop_mask), + `UNUSED_PIN (grant_valid), + .grant_index (sel_idx), + `UNUSED_PIN (grant_onehot) + ); - always @(posedge clk) begin - if (reset) begin - use_per_valids <= 0; - end else begin - if (pop_qual) begin - use_per_valids <= real_out_per_valids; - use_per_rw <= out_per_rw; - use_per_byteen <= out_per_byteen; - use_per_addr <= out_per_addr; - use_per_writedata <= out_per_writedata; - use_per_tag <= out_per_tag; - end else if (pop) begin - use_per_valids[sel_idx] <= 0; + VX_countones #( + .N(NUM_REQUESTS) + ) counter ( + .valids (q_valids), + .count (q_valids_cnt) + ); + + always @(posedge clk) begin + if (reset) begin + pop_mask <= 0; + fast_track <= 0; + q_valids_cnt_r <= 0; + end else begin + if (!q_empty + && ((0 == q_valids_cnt_r) || (pop && fast_track))) begin + q_valids_cnt_r <= q_valids_cnt; + pop_mask <= (NUM_REQUESTS'(1) << sel_idx); + fast_track <= 0; + end else if (pop) begin + q_valids_cnt_r <= q_valids_cnt_r - 1; + fast_track <= (q_valids_cnt_r == 2); + if (q_valids_cnt_r == 1 || q_valids_cnt_r == 2) begin + pop_mask <= 0; + end else begin + pop_mask[sel_idx] <= 1; + end + end + if ((0 == q_valids_cnt_r) || pop) begin + sel_tid <= sel_idx; + sel_byteen <= q_byteen[sel_idx]; + sel_addr <= q_addr[sel_idx]; + sel_writedata <= q_writedata[sel_idx]; + end end end - end + + if (CORE_TAG_ID_BITS != 0) begin + always @(posedge clk) begin + if ((0 == q_valids_cnt_r) || pop) begin + sel_tag <= q_tag; + sel_rw <= q_rw; + end + end + end else begin + always @(posedge clk) begin + if ((0 == q_valids_cnt_r) || pop) begin + sel_tag <= q_tag[sel_idx]; + sel_rw <= q_rw[sel_idx]; + end + end + end + + assign tag_out = sel_tag; + assign addr_out = sel_addr; + assign rw_out = sel_rw; + assign byteen_out = sel_byteen; + assign writedata_out = sel_writedata; + assign tid_out = sel_tid; + + assign empty = (0 == q_valids_cnt_r); + assign full = q_full; + + end else begin + `UNUSED_VAR (q_valids) + + assign q_push = push; + assign q_pop = pop; + + assign tag_out = q_tag; + assign addr_out = q_addr; + assign rw_out = q_rw; + assign byteen_out = q_byteen; + assign writedata_out = q_writedata; + assign tid_out = 0; + + assign empty = q_empty; + assign full = q_full; + end endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v index 01cff2e8..73d958fc 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_cache_miss_resrv.v @@ -178,15 +178,15 @@ module VX_cache_miss_resrv #( `ifdef DBG_PRINT_CACHE_MSRQ always @(posedge clk) begin - if (enqueue_st3 || schedule_st0 || dequeue_st3) begin + if (schedule_st0 || enqueue_st3 || dequeue_st3) begin + if (schedule_st0) + $display("%t: cache%0d:%0d msrq-schedule: addr%0d=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, schedule_ptr, `LINE_TO_BYTE_ADDR(dequeue_addr_st0, BANK_ID), debug_wid_st0, debug_pc_st0); if (enqueue_st3) begin if (enqueue_msrq_st3) $display("%t: cache%0d:%0d msrq-restore: addr%0d=%0h, ready=%b", $time, CACHE_ID, BANK_ID, restore_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr_st3, BANK_ID), enqueue_ready_st3); else $display("%t: cache%0d:%0d msrq-enq: addr%0d=%0h, ready=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, tail_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr_st3, BANK_ID), enqueue_ready_st3, debug_wid_st3, debug_pc_st3); end - if (schedule_st0) - $display("%t: cache%0d:%0d msrq-schedule: addr%0d=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, schedule_ptr, `LINE_TO_BYTE_ADDR(dequeue_addr_st0, BANK_ID), debug_wid_st0, debug_pc_st0); if (dequeue_st3) $display("%t: cache%0d:%0d msrq-deq addr%0d, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, head_ptr, debug_wid_st3, debug_pc_st3); $write("%t: cache%0d:%0d msrq-table", $time, CACHE_ID, BANK_ID); @@ -197,11 +197,6 @@ module VX_cache_miss_resrv #( if (~ready_table[j]) $write("!"); $write("addr%0d=%0h", j, `LINE_TO_BYTE_ADDR(addr_table[j], BANK_ID)); end - else if (schedule_ptr == $bits(schedule_ptr)'(j)) begin - $write(" *"); - if (~ready_table[j]) $write("!"); - $write("[addr%0d=%0h]", j, `LINE_TO_BYTE_ADDR(addr_table[j], BANK_ID)); - end end $write("\n"); end diff --git a/hw/rtl/cache/VX_snp_forwarder.v b/hw/rtl/cache/VX_snp_forwarder.v index 26367354..8c3f23c8 100644 --- a/hw/rtl/cache/VX_snp_forwarder.v +++ b/hw/rtl/cache/VX_snp_forwarder.v @@ -109,9 +109,6 @@ module VX_snp_forwarder #( .grant_onehot (sel_1hot) ); - assign fwdin_valid = snp_fwdin_valid[sel_idx]; - assign fwdin_tag = snp_fwdin_tag[sel_idx]; - wire stall = fwdin_valid && ~fwdin_ready; VX_generic_register #( diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index ffa49e82..895ea1da 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -222,7 +222,7 @@ void Simulator::eval_snp_bus() { --snp_req_size_; ++pending_snp_reqs_; #ifdef DBG_PRINT_CACHE_SNP - std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << std::dec << " tag=" << vortex_->snp_req_tag << " remain=" << snp_req_size_ << std::endl; + std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << " tag=" << vortex_->snp_req_tag << " remain=" << snp_req_size_ << std::endl; #endif } else { vortex_->snp_req_valid = 0; @@ -296,7 +296,7 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) { snp_req_active_ = true; #ifdef DBG_PRINT_CACHE_SNP - std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << std::dec << " tag=" << vortex_->snp_req_tag << " remain=" << snp_req_size_ << std::endl; + std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << " tag=" << vortex_->snp_req_tag << " remain=" << snp_req_size_ << std::endl; #endif }