diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 57aa0845..f3c1b774 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -37,7 +37,9 @@ module VX_bank #( parameter CORE_TAG_ID_BITS = 0, // bank offset from beginning of index range - parameter BANK_ADDR_OFFSET = 0 + parameter BANK_ADDR_OFFSET = 0, + + localparam MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE) ) ( `SCOPE_IO_VX_bank @@ -76,12 +78,14 @@ module VX_bank #( output wire mem_req_rw, output wire [CACHE_LINE_SIZE-1:0] mem_req_byteen, output wire [`LINE_ADDR_WIDTH-1:0] mem_req_addr, + output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id, output wire [`CACHE_LINE_WIDTH-1:0] mem_req_data, input wire mem_req_ready, // Memory response input wire mem_rsp_valid, input wire [`LINE_ADDR_WIDTH-1:0] mem_rsp_addr, + input wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id, input wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data, output wire mem_rsp_ready, @@ -108,7 +112,7 @@ module VX_bank #( wire [`LINE_ADDR_WIDTH-1:0] creq_addr; wire [CORE_TAG_WIDTH-1:0] creq_tag; - wire creq_out_valid, creq_out_ready; + wire creq_valid, creq_ready; VX_elastic_buffer #( .DATAW (CORE_TAG_WIDTH + 1 + `LINE_ADDR_WIDTH + (1 + `UP(`WORD_SELECT_BITS) + WORD_SIZE + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS), @@ -121,13 +125,14 @@ module VX_bank #( .valid_in (core_req_valid), .data_in ({core_req_tag, core_req_rw, core_req_addr, core_req_pmask, core_req_wsel, core_req_byteen, core_req_data, core_req_tid}), .data_out ({creq_tag, creq_rw, creq_addr, creq_pmask, creq_wsel, creq_byteen, creq_data, creq_tid}), - .ready_out (creq_out_ready), - .valid_out (creq_out_valid) + .ready_out (creq_ready), + .valid_out (creq_valid) ); + wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id; wire mshr_alm_full; - wire mshr_pop; wire mshr_valid; + wire [MSHR_ADDR_WIDTH-1:0] mshr_dequeue_id; wire [`LINE_ADDR_WIDTH-1:0] mshr_addr; wire [CORE_TAG_WIDTH-1:0] mshr_tag; wire [NUM_PORTS-1:0] mshr_pmask; @@ -141,60 +146,43 @@ module VX_bank #( wire [NUM_PORTS-1:0][`REQS_BITS-1:0] req_tid_st0, req_tid_st1; wire [NUM_PORTS-1:0] pmask_st0, pmask_st1; wire [`CACHE_LINE_WIDTH-1:0] rdata_st1; - wire [`CACHE_LINE_WIDTH-1:0] wdata_st0, wdata_st1; + wire [`CACHE_LINE_WIDTH-1:0] wdata_st0, wdata_st1; + wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1; wire [CORE_TAG_WIDTH-1:0] tag_st0, tag_st1; wire valid_st0, valid_st1; wire is_fill_st0, is_fill_st1; - wire is_mshr_st0, is_mshr_st1; - wire miss_st0, miss_st1; - wire prev_miss_dep_st0; - wire force_miss_st0, force_miss_st1; - wire not_same_prev_mshr_st0, not_same_prev_mshr_st1; - wire writeen_unqual_st0, writeen_unqual_st1; - wire incoming_fill_unqual_st0, incoming_fill_unqual_st1; - wire mshr_pending_st0; + wire is_mshr_st0, is_mshr_st1; + wire miss_st0, miss_st1; + wire writeen_unqual_st1; wire is_flush_st0; + wire mshr_pending_st0, mshr_pending_st1; - wire crsq_in_valid, crsq_in_ready, crsq_in_stall; + wire crsq_valid, crsq_ready, crsq_stall; wire mreq_alm_full; - wire creq_out_fire = creq_out_valid && creq_out_ready; - wire crsq_in_fire = crsq_in_valid && crsq_in_ready; + wire creq_fire = creq_valid && creq_ready; - VX_pending_size #( - .SIZE (MSHR_SIZE) - ) mshr_pending_size ( - .clk (clk), - .reset (reset), - .push (creq_out_fire && !creq_rw), - .pop (crsq_in_fire), - .full (mshr_alm_full), - `UNUSED_PIN (empty), - `UNUSED_PIN (size) - ); - // determine which queue to pop next in priority order - wire mshr_grant = !mreq_alm_full; // ensure memory request queue not full (deadlock prevention) + wire mshr_grant = 1; wire mshr_enable = mshr_grant && mshr_valid; wire mrsq_grant = !mshr_enable; wire mrsq_enable = mrsq_grant && mem_rsp_valid; wire creq_grant = !mshr_enable && !mrsq_enable && !flush_enable; - - wire is_miss_st1 = (miss_st1 || force_miss_st1); - assign mshr_pop = mshr_enable - && !(valid_st1 && is_mshr_st1 && is_miss_st1) // do not schedule another mshr request if the previous one missed - && !crsq_in_stall; // ensure core response ready - - assign creq_out_ready = creq_grant - && !mreq_alm_full // ensure memory request ready - && !mshr_alm_full // ensure mshr enqueue ready - && !crsq_in_stall; // ensure core response ready + wire mshr_ready = mshr_grant + && !crsq_stall; // ensure core response ready assign mem_rsp_ready = mrsq_grant - && !crsq_in_stall; // ensure core response ready + && !crsq_stall; // ensure core response ready + + assign creq_ready = creq_grant + && !mreq_alm_full // ensure memory request ready + && !mshr_alm_full // ensure mshr enqueue ready + && !crsq_stall; // ensure core response ready + + wire mshr_fire = mshr_valid && mshr_ready; wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready; @@ -228,17 +216,17 @@ module VX_bank #( end VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH + 1), + .DATAW (1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH + MSHR_ADDR_WIDTH), .RESETW (1) ) pipe_reg0 ( .clk (clk), .reset (reset), - .enable (!crsq_in_stall), + .enable (!crsq_stall), .data_in ({ - flush_enable || mshr_pop || mem_rsp_fire || creq_out_fire, + flush_enable || mshr_fire || mem_rsp_fire || creq_fire, flush_enable, - mshr_enable, mrsq_enable || flush_enable, + mshr_enable, mshr_enable ? 1'b0 : creq_rw, mshr_enable ? mshr_addr : (mem_rsp_valid ? mem_rsp_addr : (flush_enable ? `LINE_ADDR_WIDTH'(flush_addr) : creq_addr)), (mem_rsp_valid || !WRITE_ENABLE) ? mem_rsp_data : creq_line_data, @@ -246,9 +234,10 @@ module VX_bank #( creq_byteen, mshr_enable ? mshr_tid : creq_tid, mshr_enable ? mshr_pmask : creq_pmask, - mshr_enable ? mshr_tag : creq_tag + mshr_enable ? mshr_tag : creq_tag, + mshr_enable ? mshr_dequeue_id : (mem_rsp_valid ? mem_rsp_id : mshr_alloc_id) }), - .data_out ({valid_st0, is_flush_st0, is_mshr_st0, is_fill_st0, mem_rw_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0}) + .data_out ({valid_st0, is_flush_st0, is_fill_st0, is_mshr_st0, mem_rw_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_id_st0}) ); `ifdef DBG_CACHE_REQ_INFO @@ -260,7 +249,7 @@ module VX_bank #( `endif wire do_lookup_st0 = valid_st0 && ~is_fill_st0; - wire do_fill_st0 = valid_st0 && is_fill_st0 && !crsq_in_stall; + wire do_fill_st0 = valid_st0 && is_fill_st0; wire tag_match_st0; @@ -279,7 +268,8 @@ module VX_bank #( `ifdef DBG_CACHE_REQ_INFO .debug_pc (debug_pc_st0), .debug_wid (debug_wid_st0), - `endif + `endif + .stall (crsq_stall), // read/Fill .lookup (do_lookup_st0), @@ -289,35 +279,18 @@ module VX_bank #( .tag_match (tag_match_st0) ); - // we had a miss with prior request for the current address - assign prev_miss_dep_st0 = valid_st1 && is_miss_st1 && (addr_st0 == addr_st1); - // we have a core request hit assign miss_st0 = !is_fill_st0 && !tag_match_st0; - // force a miss to ensure commit order when a new request has pending previous requests to same block - // also force a miss for mshr requests when previous request was a missed - assign force_miss_st0 = (!is_fill_st0 && !is_mshr_st0 && (mshr_pending_st0 || prev_miss_dep_st0)) - || (is_mshr_st0 && valid_st1 && is_mshr_st1 && is_miss_st1); - - // previous mshr request doesn't have same address - assign not_same_prev_mshr_st0 = valid_st1 && is_mshr_st1 && (addr_st1 != addr_st0); - - // enable write when we have a fill request that is not redundant - assign writeen_unqual_st0 = is_fill_st0 && !tag_match_st0; - - // check if incoming memory response match current address - assign incoming_fill_unqual_st0 = mem_rsp_valid && (addr_st0 == mem_rsp_addr); - VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH + MSHR_ADDR_WIDTH + 1), .RESETW (1) ) pipe_reg1 ( .clk (clk), .reset (reset), - .enable (!crsq_in_stall), - .data_in ({valid_st0, is_mshr_st0, is_fill_st0, writeen_unqual_st0, incoming_fill_unqual_st0, miss_st0, force_miss_st0, mem_rw_st0, not_same_prev_mshr_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0}), - .data_out ({valid_st1, is_mshr_st1, is_fill_st1, writeen_unqual_st1, incoming_fill_unqual_st1, miss_st1, force_miss_st1, mem_rw_st1, not_same_prev_mshr_st1, addr_st1, wdata_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1}) + .enable (!crsq_stall), + .data_in ({valid_st0, is_fill_st0, is_mshr_st0, is_fill_st0, miss_st0, mem_rw_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_id_st0, mshr_pending_st0}), + .data_out ({valid_st1, is_fill_st1, is_mshr_st1, writeen_unqual_st1, miss_st1, mem_rw_st1, addr_st1, wdata_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1, mshr_id_st1, mshr_pending_st1}) ); `ifdef DBG_CACHE_REQ_INFO @@ -328,21 +301,16 @@ module VX_bank #( end `endif - wire writeen_st1 = (WRITE_ENABLE && !is_fill_st1 && mem_rw_st1 && ~is_miss_st1) + wire writeen_st1 = (WRITE_ENABLE && !is_fill_st1 && mem_rw_st1 && !miss_st1) || writeen_unqual_st1; wire readen_st1 = !is_fill_st1 && !mem_rw_st1; - wire crsq_push_st1 = readen_st1 && ~is_miss_st1; - - wire mshr_push_st1 = readen_st1 && is_miss_st1; - - wire incoming_fill_st1 = (mem_rsp_valid && (addr_st1 == mem_rsp_addr)) - || incoming_fill_unqual_st1; + wire crsq_push_st1 = readen_st1 && !miss_st1; wire do_writeback_st1 = !is_fill_st1 && mem_rw_st1; - wire mreq_push_st1 = (readen_st1 && miss_st1 && (~force_miss_st1 || not_same_prev_mshr_st1) && !incoming_fill_st1) + wire mreq_push_st1 = (readen_st1 && miss_st1 && !mshr_pending_st1) || do_writeback_st1; wire [`WORDS_PER_LINE-1:0][WORD_SIZE-1:0] line_byteen_st1; @@ -360,6 +328,7 @@ module VX_bank #( assign line_byteen_st1 = line_byteen_r; end else begin assign line_byteen_st1 = byteen_st1; + `UNUSED_VAR (wsel_st1) end VX_data_access #( @@ -379,6 +348,8 @@ module VX_bank #( .debug_wid (debug_wid_st1), `endif + .stall (crsq_stall), + .addr (addr_st1), // reading @@ -391,14 +362,13 @@ module VX_bank #( .byteen (line_byteen_st1), .wdata (wdata_st1) ); + + wire mshr_allocate = creq_fire && ~creq_rw; + wire mshr_replay = do_fill_st0 && ~crsq_stall; + wire mshr_lookup = valid_st0 && !is_fill_st0 && ~is_mshr_st0 && ~mem_rw_st0 && ~crsq_stall; + wire mshr_release = valid_st1 && readen_st1 && ~is_mshr_st1 && ~miss_st1 && ~crsq_stall; - wire mshr_push = valid_st1 && mshr_push_st1; - wire mshr_dequeue = valid_st1 && is_mshr_st1 && !mshr_push_st1 && crsq_in_ready; - wire mshr_restore = is_mshr_st1; - - // push a missed request as 'ready' if it was a forced miss that actually had a hit - // or the fill request for this block is comming - wire mshr_init_ready_state = !miss_st1 || incoming_fill_unqual_st1; + wire mshr_not_full; VX_miss_resrv #( .BANK_ID (BANK_ID), @@ -418,38 +388,44 @@ module VX_bank #( `ifdef DBG_CACHE_REQ_INFO .deq_debug_pc (debug_pc_sel), .deq_debug_wid (debug_wid_sel), - .enq_debug_pc (debug_pc_st1), - .enq_debug_wid (debug_wid_st1), + .lkp_debug_pc (debug_pc_st0), + .lkp_debug_wid (debug_wid_st0), + .rel_debug_pc (debug_pc_st1), + .rel_debug_wid (debug_wid_st1), `endif - // enqueue - .enqueue (mshr_push), - .enqueue_addr (addr_st1), - .enqueue_data ({wsel_st1, tag_st1, req_tid_st1, pmask_st1}), - .enqueue_is_mshr (mshr_restore), - .enqueue_as_ready (mshr_init_ready_state), - `UNUSED_PIN (enqueue_almfull), - `UNUSED_PIN (enqueue_full), - - // fill - .fill_start (mem_rsp_fire), - .fill_addr (mem_rsp_addr), + // allocate + .allocate_valid (mshr_allocate), + .allocate_addr (creq_addr), + .allocate_data ({creq_wsel, creq_tag, creq_tid, creq_pmask}), + .allocate_id (mshr_alloc_id), + .allocate_ready (mshr_not_full), // lookup + .lookup_valid (mshr_lookup), + .lookup_replay (mshr_replay), + .lookup_id (mshr_id_st0), .lookup_addr (addr_st0), .lookup_match (mshr_pending_st0), - .lookup_fill (do_fill_st0), - // schedule - .schedule (mshr_pop), - .schedule_valid (mshr_valid), - .schedule_addr (mshr_addr), - .schedule_data ({mshr_wsel, mshr_tag, mshr_tid, mshr_pmask}), + // fill + .fill_valid (mem_rsp_fire), + .fill_id (mem_rsp_id), // dequeue - .dequeue (mshr_dequeue) + .dequeue_valid (mshr_valid), + .dequeue_id (mshr_dequeue_id), + .dequeue_addr (mshr_addr), + .dequeue_data ({mshr_wsel, mshr_tag, mshr_tid, mshr_pmask}), + .dequeue_ready (mshr_ready), + + // release + .release_valid (mshr_release), + .release_id (mshr_id_st1) ); + assign mshr_alm_full = ~mshr_not_full; + // Enqueue core response wire [NUM_PORTS-1:0] crsq_pmask; @@ -457,8 +433,8 @@ module VX_bank #( wire [NUM_PORTS-1:0][`REQS_BITS-1:0] crsq_tid; wire [CORE_TAG_WIDTH-1:0] crsq_tag; - assign crsq_in_valid = valid_st1 && crsq_push_st1; - assign crsq_in_stall = crsq_in_valid && !crsq_in_ready; + assign crsq_valid = valid_st1 && crsq_push_st1; + assign crsq_stall = crsq_valid && !crsq_ready; assign crsq_pmask = pmask_st1; assign crsq_tid = req_tid_st1; @@ -479,9 +455,9 @@ module VX_bank #( ) core_rsp_req ( .clk (clk), .reset (reset), - .valid_in (crsq_in_valid), + .valid_in (crsq_valid), .data_in ({crsq_tag, crsq_pmask, crsq_data, crsq_tid}), - .ready_in (crsq_in_ready), + .ready_in (crsq_ready), .valid_out (core_rsp_valid), .data_out ({core_rsp_tag, core_rsp_pmask, core_rsp_data, core_rsp_tid}), .ready_out (core_rsp_ready) @@ -491,6 +467,7 @@ module VX_bank #( wire [CACHE_LINE_SIZE-1:0] mreq_byteen; wire [`LINE_ADDR_WIDTH-1:0] mreq_addr; + wire [MSHR_ADDR_WIDTH-1:0] mreq_id; wire [`CACHE_LINE_WIDTH-1:0] mreq_data; wire mreq_push, mreq_pop, mreq_empty, mreq_rw; @@ -501,10 +478,11 @@ module VX_bank #( assign mreq_rw = WRITE_ENABLE && do_writeback_st1; assign mreq_byteen = mreq_rw ? line_byteen_st1 : {CACHE_LINE_SIZE{1'b1}}; assign mreq_addr = addr_st1; + assign mreq_id = mshr_id_st1; assign mreq_data = wdata_st1; VX_fifo_queue #( - .DATAW (1 + CACHE_LINE_SIZE + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH), + .DATAW (1 + CACHE_LINE_SIZE + `LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + `CACHE_LINE_WIDTH), .SIZE (MREQ_SIZE), .ALM_FULL (MREQ_SIZE-2) ) mem_req_queue ( @@ -512,8 +490,8 @@ module VX_bank #( .reset (reset), .push (mreq_push), .pop (mreq_pop), - .data_in ({mreq_rw, mreq_byteen, mreq_addr, mreq_data}), - .data_out ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data}), + .data_in ({mreq_rw, mreq_byteen, mreq_addr, mreq_id, mreq_data}), + .data_out ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_id, mem_req_data}), .empty (mreq_empty), .alm_full (mreq_alm_full), `UNUSED_PIN (full), @@ -527,10 +505,8 @@ module VX_bank #( `SCOPE_ASSIGN (valid_st1, valid_st1); `SCOPE_ASSIGN (is_fill_st0, is_fill_st0); `SCOPE_ASSIGN (is_mshr_st0, is_mshr_st0); - `SCOPE_ASSIGN (miss_st0, miss_st0); - `SCOPE_ASSIGN (force_miss_st0, force_miss_st0); - `SCOPE_ASSIGN (mshr_push, mshr_push); - `SCOPE_ASSIGN (crsq_in_stall, crsq_in_stall); + `SCOPE_ASSIGN (miss_st0, miss_st0); + `SCOPE_ASSIGN (crsq_stall, crsq_stall); `SCOPE_ASSIGN (mreq_alm_full, mreq_alm_full); `SCOPE_ASSIGN (mshr_alm_full, mshr_alm_full); `SCOPE_ASSIGN (addr_st0, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID)); @@ -539,45 +515,43 @@ module VX_bank #( `ifdef PERF_ENABLE assign perf_read_misses = valid_st1 && !is_fill_st1 && !is_mshr_st1 && miss_st1 && !mem_rw_st1; assign perf_write_misses = valid_st1 && !is_fill_st1 && !is_mshr_st1 && miss_st1 && mem_rw_st1; - assign perf_pipe_stalls = crsq_in_stall || mreq_alm_full || mshr_alm_full; + assign perf_pipe_stalls = crsq_stall || mreq_alm_full || mshr_alm_full; assign perf_mshr_stalls = mshr_alm_full; `endif `ifdef DBG_PRINT_CACHE_BANK - always @(posedge clk) begin - /*if (crsq_in_fire && (NUM_PORTS > 1) && $countones(crsq_pmask) > 1) begin - $display("%t: *** cache%0d:%0d multi-port-out: pmask=%b, addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, crsq_pmask, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag); - end*/ - if (valid_st1 && !is_fill_st1 && miss_st1 && incoming_fill_st1) begin - $display("%t: *** cache%0d:%0d miss with incoming fill - addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); - assert(!is_mshr_st1); - end - if (crsq_in_stall || mreq_alm_full || mshr_alm_full) begin - $display("%t: *** cache%0d:%0d pipeline-stall: cwbq=%b, dwbq=%b, mshr=%b", $time, CACHE_ID, BANK_ID, crsq_in_stall, mreq_alm_full, mshr_alm_full); + wire crsq_fire = crsq_valid && crsq_ready; + + wire pipeline_stall = (mshr_valid || mem_rsp_valid || creq_valid) + && ~(mshr_fire || mem_rsp_fire || creq_fire); + + always @(posedge clk) begin + if (pipeline_stall) begin + $display("%d: *** cache%0d:%0d stall: crsq=%b, mreq=%b, mshr=%b", $time, CACHE_ID, BANK_ID, crsq_stall, mreq_alm_full, mshr_alm_full); end if (flush_enable) begin $display("%t: cache%0d:%0d flush: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(flush_addr, BANK_ID)); end if (mem_rsp_fire) begin - $display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_data); + $display("%t: cache%0d:%0d fill-rsp: addr=%0h, id=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data); end - if (mshr_pop) begin + if (mshr_fire) begin $display("%t: cache%0d:%0d mshr-pop: addr=%0h, tag=%0h, pmask=%b, tid=%0d, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mshr_addr, BANK_ID), mshr_tag, mshr_pmask, mshr_tid, debug_wid_sel, debug_pc_sel); end - if (creq_out_fire) begin + if (creq_fire) begin if (creq_rw) $display("%t: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, creq_data, debug_wid_sel, debug_pc_sel); else $display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, debug_wid_sel, debug_pc_sel); end - if (crsq_in_fire) begin + if (crsq_fire) begin $display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, pmask=%b, tid=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_pmask, crsq_tid, crsq_data, debug_wid_st1, debug_pc_st1); end if (mreq_push) begin if (do_writeback_st1) $display("%t: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_data, mreq_byteen, debug_wid_st1, debug_pc_st1); else - $display("%t: cache%0d:%0d fill-req: addr=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), debug_wid_st1, debug_pc_st1); + $display("%t: cache%0d:%0d fill-req: addr=%0h, id=%0d, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_id, debug_wid_st1, debug_pc_st1); end end `endif diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index 9637e0b6..bbe1e10a 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -91,6 +91,8 @@ module VX_cache #( `STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value")) `STATIC_ASSERT(NUM_PORTS <= NUM_BANKS, ("invalid value")) + localparam MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE); + localparam MEM_TAG_IN_WIDTH = `MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH; localparam CORE_TAG_X_WIDTH = CORE_TAG_WIDTH - NC_ENABLE; localparam CORE_TAG_ID_X_BITS = (CORE_TAG_ID_BITS != 0) ? (CORE_TAG_ID_BITS - NC_ENABLE) : CORE_TAG_ID_BITS; @@ -125,13 +127,13 @@ module VX_cache #( wire [CACHE_LINE_SIZE-1:0] mem_req_byteen_nc; wire [`MEM_ADDR_WIDTH-1:0] mem_req_addr_nc; wire [`CACHE_LINE_WIDTH-1:0] mem_req_data_nc; - wire [`MEM_ADDR_WIDTH-1:0] mem_req_tag_nc; + wire [MEM_TAG_IN_WIDTH-1:0] mem_req_tag_nc; wire mem_req_ready_nc; // Memory response wire mem_rsp_valid_nc; wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_nc; - wire [`MEM_ADDR_WIDTH-1:0] mem_rsp_tag_nc; + wire [MEM_TAG_IN_WIDTH-1:0] mem_rsp_tag_nc; wire mem_rsp_ready_nc; if (NC_ENABLE) begin @@ -146,7 +148,7 @@ module VX_cache #( .MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH), .MEM_DATA_SIZE (CACHE_LINE_SIZE), - .MEM_TAG_IN_WIDTH (`MEM_ADDR_WIDTH), + .MEM_TAG_IN_WIDTH (MEM_TAG_IN_WIDTH), .MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH) ) nc_bypass ( .clk (clk), @@ -246,12 +248,12 @@ module VX_cache #( /////////////////////////////////////////////////////////////////////////// wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_qual; - wire [`MEM_ADDR_WIDTH-1:0] mem_rsp_tag_qual; + wire [MEM_TAG_IN_WIDTH-1:0] mem_rsp_tag_qual; wire mrsq_out_valid, mrsq_out_ready; VX_elastic_buffer #( - .DATAW (`MEM_ADDR_WIDTH + `CACHE_LINE_WIDTH), + .DATAW (MEM_TAG_IN_WIDTH + `CACHE_LINE_WIDTH), .SIZE (MRSQ_SIZE), .OUTPUT_REG (MRSQ_SIZE > 2) ) mem_rsp_queue ( @@ -307,6 +309,7 @@ module VX_cache #( wire [NUM_BANKS-1:0] per_bank_mem_req_rw; wire [NUM_BANKS-1:0][CACHE_LINE_SIZE-1:0] per_bank_mem_req_byteen; wire [NUM_BANKS-1:0][`MEM_ADDR_WIDTH-1:0] per_bank_mem_req_addr; + wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id; wire [NUM_BANKS-1:0][`CACHE_LINE_WIDTH-1:0] per_bank_mem_req_data; wire [NUM_BANKS-1:0] per_bank_mem_req_ready; @@ -316,7 +319,7 @@ module VX_cache #( `UNUSED_VAR (mem_rsp_tag_qual) assign mrsq_out_ready = per_bank_mem_rsp_ready; end else begin - assign mrsq_out_ready = per_bank_mem_rsp_ready[`MEM_ADDR_BANK(mem_rsp_tag_qual)]; + assign mrsq_out_ready = per_bank_mem_rsp_ready[`MEM_TAG_TO_BANK_ID(mem_rsp_tag_qual)]; end VX_core_req_bank_sel #( @@ -378,11 +381,13 @@ module VX_cache #( wire curr_bank_mem_req_rw; wire [CACHE_LINE_SIZE-1:0] curr_bank_mem_req_byteen; wire [`LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr; + wire [MSHR_ADDR_WIDTH-1:0] curr_bank_mem_req_id; wire[`CACHE_LINE_WIDTH-1:0] curr_bank_mem_req_data; wire curr_bank_mem_req_ready; wire curr_bank_mem_rsp_valid; - wire [`LINE_ADDR_WIDTH-1:0] curr_bank_mem_rsp_addr; + wire [`LINE_ADDR_WIDTH-1:0] curr_bank_mem_rsp_addr; + wire [MSHR_ADDR_WIDTH-1:0] curr_bank_mem_rsp_id; wire [`CACHE_LINE_WIDTH-1:0] curr_bank_mem_rsp_data; wire curr_bank_mem_rsp_ready; @@ -407,25 +412,27 @@ module VX_cache #( assign per_bank_core_rsp_data [i] = curr_bank_core_rsp_data; // Memory request - assign per_bank_mem_req_valid[i] = curr_bank_mem_req_valid; - assign per_bank_mem_req_rw[i] = curr_bank_mem_req_rw; + assign per_bank_mem_req_valid[i] = curr_bank_mem_req_valid; + assign per_bank_mem_req_rw[i] = curr_bank_mem_req_rw; assign per_bank_mem_req_byteen[i] = curr_bank_mem_req_byteen; if (NUM_BANKS == 1) begin assign per_bank_mem_req_addr[i] = curr_bank_mem_req_addr; end else begin assign per_bank_mem_req_addr[i] = `LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, i); end + assign per_bank_mem_req_id[i] = curr_bank_mem_req_id; assign per_bank_mem_req_data[i] = curr_bank_mem_req_data; - assign curr_bank_mem_req_ready = per_bank_mem_req_ready[i]; + assign curr_bank_mem_req_ready = per_bank_mem_req_ready[i]; // Memory response if (NUM_BANKS == 1) begin assign curr_bank_mem_rsp_valid = mrsq_out_valid; - assign curr_bank_mem_rsp_addr = mem_rsp_tag_qual; + assign curr_bank_mem_rsp_addr = `MEM_TAG_TO_LINE_ADDR(mem_rsp_tag_qual); end else begin - assign curr_bank_mem_rsp_valid = mrsq_out_valid && (`MEM_ADDR_BANK(mem_rsp_tag_qual) == i); - assign curr_bank_mem_rsp_addr = `MEM_TO_LINE_ADDR(mem_rsp_tag_qual); + assign curr_bank_mem_rsp_valid = mrsq_out_valid && (`MEM_TAG_TO_BANK_ID(mem_rsp_tag_qual) == i); + assign curr_bank_mem_rsp_addr = `MEM_TAG_TO_LINE_ADDR(mem_rsp_tag_qual); end + assign curr_bank_mem_rsp_id = `MEM_TAG_TO_REQ_ID(mem_rsp_tag_qual); assign curr_bank_mem_rsp_data = mem_rsp_data_qual; assign per_bank_mem_rsp_ready[i] = curr_bank_mem_rsp_ready; @@ -484,12 +491,14 @@ module VX_cache #( .mem_req_rw (curr_bank_mem_req_rw), .mem_req_byteen (curr_bank_mem_req_byteen), .mem_req_addr (curr_bank_mem_req_addr), + .mem_req_id (curr_bank_mem_req_id), .mem_req_data (curr_bank_mem_req_data), .mem_req_ready (curr_bank_mem_req_ready), // Memory response .mem_rsp_valid (curr_bank_mem_rsp_valid), .mem_rsp_addr (curr_bank_mem_rsp_addr), + .mem_rsp_id (curr_bank_mem_rsp_id), .mem_rsp_data (curr_bank_mem_rsp_data), .mem_rsp_ready (curr_bank_mem_rsp_ready), @@ -523,14 +532,16 @@ module VX_cache #( .core_rsp_ready (core_rsp_ready_nc) ); - wire [NUM_BANKS-1:0][(`MEM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH)-1:0] data_in; + wire [NUM_BANKS-1:0][(MEM_TAG_IN_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH)-1:0] data_in; for (genvar i = 0; i < NUM_BANKS; i++) begin - assign data_in[i] = {per_bank_mem_req_addr[i], per_bank_mem_req_rw[i], per_bank_mem_req_byteen[i], per_bank_mem_req_data[i]}; + assign data_in[i] = {per_bank_mem_req_addr[i], per_bank_mem_req_id[i], per_bank_mem_req_rw[i], per_bank_mem_req_byteen[i], per_bank_mem_req_data[i]}; end + wire [MSHR_ADDR_WIDTH-1:0] mem_req_id; + VX_stream_arbiter #( .NUM_REQS (NUM_BANKS), - .DATAW (`MEM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH), + .DATAW (`MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH), .BUFFERED (1) ) mem_req_arb ( .clk (clk), @@ -539,11 +550,11 @@ module VX_cache #( .data_in (data_in), .ready_in (per_bank_mem_req_ready), .valid_out (mem_req_valid_nc), - .data_out ({mem_req_addr_nc, mem_req_rw_nc, mem_req_byteen_nc, mem_req_data_nc}), + .data_out ({mem_req_addr_nc, mem_req_id, mem_req_rw_nc, mem_req_byteen_nc, mem_req_data_nc}), .ready_out (mem_req_ready_nc) ); - assign mem_req_tag_nc = mem_req_addr_nc; + assign mem_req_tag_nc = MEM_TAG_IN_WIDTH'({mem_req_addr_nc, mem_req_id}); `ifdef PERF_ENABLE // per cycle: core_reads, core_writes diff --git a/hw/rtl/cache/VX_cache_define.vh b/hw/rtl/cache/VX_cache_define.vh index 5ca9e80d..52f4f06a 100644 --- a/hw/rtl/cache/VX_cache_define.vh +++ b/hw/rtl/cache/VX_cache_define.vh @@ -59,12 +59,14 @@ `define BANK_READY_COUNT ((SHARED_BANK_READY != 0) ? 1 : NUM_BANKS) -`define MEM_ADDR_BANK(x) x[`BANK_SELECT_BITS+BANK_ADDR_OFFSET-1 : BANK_ADDR_OFFSET] - -`define MEM_TO_LINE_ADDR(x) x[`MEM_ADDR_WIDTH-1 : `BANK_SELECT_BITS] - `define LINE_TO_MEM_ADDR(x, i) {x, `BANK_SELECT_BITS'(i)} +`define MEM_TAG_TO_REQ_ID(x) x[MSHR_ADDR_WIDTH-1:0] + +`define MEM_TAG_TO_BANK_ID(x) x[MSHR_ADDR_WIDTH +: `BANK_SELECT_BITS] + +`define MEM_TAG_TO_LINE_ADDR(x) x[(MSHR_ADDR_WIDTH+`BANK_SELECT_BITS) +: `LINE_ADDR_WIDTH] + `define LINE_TO_BYTE_ADDR(x, i) {x, (32-$bits(x))'(i << (32-$bits(x)-`BANK_SELECT_BITS))} `define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)} diff --git a/hw/rtl/cache/VX_data_access.v b/hw/rtl/cache/VX_data_access.v index f58c14c5..13179f9c 100644 --- a/hw/rtl/cache/VX_data_access.v +++ b/hw/rtl/cache/VX_data_access.v @@ -24,6 +24,8 @@ module VX_data_access #( `IGNORE_UNUSED_END `endif + input wire stall, + `IGNORE_UNUSED_BEGIN input wire[`LINE_ADDR_WIDTH-1:0] addr, `IGNORE_UNUSED_END @@ -75,16 +77,18 @@ module VX_data_access #( .dout(rdata) ); + `UNUSED_VAR (stall) + `ifdef DBG_PRINT_CACHE_DATA always @(posedge clk) begin - if (writeen) begin + if (writeen && ~stall) begin if (is_fill) begin $display("%t: cache%0d:%0d data-fill: addr=%0h, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, wdata); end else begin $display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, byte_enable, line_addr, wdata); end end - if (readen) begin + if (readen && ~stall) begin $display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, rdata); end end diff --git a/hw/rtl/cache/VX_miss_resrv.v b/hw/rtl/cache/VX_miss_resrv.v index 1d96f540..368accf0 100644 --- a/hw/rtl/cache/VX_miss_resrv.v +++ b/hw/rtl/cache/VX_miss_resrv.v @@ -19,7 +19,9 @@ module VX_miss_resrv #( parameter MSHR_SIZE = 1, parameter ALM_FULL = (MSHR_SIZE-1), // core request tag size - parameter CORE_TAG_WIDTH = 1 + parameter CORE_TAG_WIDTH = 1, + + localparam MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE) ) ( input wire clk, input wire reset, @@ -28,159 +30,147 @@ module VX_miss_resrv #( `IGNORE_UNUSED_BEGIN input wire[31:0] deq_debug_pc, input wire[`NW_BITS-1:0] deq_debug_wid, - input wire[31:0] enq_debug_pc, - input wire[`NW_BITS-1:0] enq_debug_wid, + input wire[31:0] lkp_debug_pc, + input wire[`NW_BITS-1:0] lkp_debug_wid, + input wire[31:0] rel_debug_pc, + input wire[`NW_BITS-1:0] rel_debug_wid, `IGNORE_UNUSED_END `endif - // enqueue - input wire enqueue, - input wire [`LINE_ADDR_WIDTH-1:0] enqueue_addr, - input wire [`MSHR_DATA_WIDTH-1:0] enqueue_data, - input wire enqueue_is_mshr, - input wire enqueue_as_ready, - output wire enqueue_full, - output wire enqueue_almfull, + // allocate + input wire allocate_valid, + input wire [`LINE_ADDR_WIDTH-1:0] allocate_addr, + input wire [`MSHR_DATA_WIDTH-1:0] allocate_data, + output wire [MSHR_ADDR_WIDTH-1:0] allocate_id, + output wire allocate_ready, // fill - input wire fill_start, - input wire [`LINE_ADDR_WIDTH-1:0] fill_addr, + input wire fill_valid, + input wire [MSHR_ADDR_WIDTH-1:0] fill_id, // lookup + input wire lookup_valid, + input wire lookup_replay, + input wire [MSHR_ADDR_WIDTH-1:0] lookup_id, input wire [`LINE_ADDR_WIDTH-1:0] lookup_addr, output wire lookup_match, - input wire lookup_fill, + + // dequeue + output wire dequeue_valid, + output wire [MSHR_ADDR_WIDTH-1:0] dequeue_id, + output wire [`LINE_ADDR_WIDTH-1:0] dequeue_addr, + output wire [`MSHR_DATA_WIDTH-1:0] dequeue_data, + input wire dequeue_ready, - // schedule - input wire schedule, - output wire schedule_valid, - output wire [`LINE_ADDR_WIDTH-1:0] schedule_addr, - output wire [`MSHR_DATA_WIDTH-1:0] schedule_data, - - // dequeue - input wire dequeue + // release + input wire release_valid, + input wire [MSHR_ADDR_WIDTH-1:0] release_id ); `UNUSED_PARAM (CACHE_ID) `UNUSED_PARAM (BANK_ID) - localparam ADDRW = $clog2(MSHR_SIZE); - - reg [MSHR_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table; + reg [MSHR_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table, addr_table_n; reg [MSHR_SIZE-1:0] valid_table, valid_table_n; reg [MSHR_SIZE-1:0] ready_table, ready_table_n; - reg [ADDRW-1:0] head_ptr, head_ptr_n; - reg [ADDRW-1:0] tail_ptr, tail_ptr_n; - reg [ADDRW-1:0] restore_ptr, restore_ptr_n; - reg [ADDRW-1:0] schedule_ptr, schedule_ptr_n; - reg [ADDRW-1:0] used_r; - reg alm_full_r, full_r; - reg valid_out_r; + + reg allocate_rdy_r, allocate_rdy_n; + reg [MSHR_ADDR_WIDTH-1:0] allocate_id_r, allocate_id_n; + + reg dequeue_val_r, dequeue_val_n, dequeue_val_x; + reg [MSHR_ADDR_WIDTH-1:0] dequeue_id_r, dequeue_id_n, dequeue_id_x; - wire [MSHR_SIZE-1:0] valid_address_match; - for (genvar i = 0; i < MSHR_SIZE; i++) begin - assign valid_address_match[i] = valid_table[i] && (addr_table[i] == lookup_addr); + reg [MSHR_SIZE-1:0] valid_table_x; + reg [MSHR_SIZE-1:0] ready_table_x; + + wire [MSHR_SIZE-1:0] addr_match; + + wire allocate_fire = allocate_valid && allocate_ready; + + wire dequeue_fire = dequeue_valid && dequeue_ready; + + for (genvar i = 0; i < MSHR_SIZE; ++i) begin + assign addr_match[i] = (i != lookup_id) && valid_table[i] && (addr_table[i] == lookup_addr); + end + + always @(*) begin + valid_table_x = valid_table; + ready_table_x = ready_table; + if (dequeue_fire) begin + valid_table_x[dequeue_id] = 0; + end + if (lookup_replay) begin + ready_table_x |= addr_match; + end end - wire push_new = enqueue && !enqueue_is_mshr; + VX_priority_encoder #( + .N (MSHR_SIZE) + ) dequeue_pe ( + .data_in (valid_table_x & ready_table_x), + .index (dequeue_id_x), + .valid_out (dequeue_val_x), + `UNUSED_PIN (onehot) + ); - wire restore = enqueue && enqueue_is_mshr; + VX_priority_encoder #( + .N (MSHR_SIZE) + ) allocate_pe ( + .data_in (~valid_table_n), + .index (allocate_id_n), + .valid_out (allocate_rdy_n), + `UNUSED_PIN (onehot) + ); always @(*) begin - valid_table_n = valid_table; - ready_table_n = ready_table; - head_ptr_n = head_ptr; - tail_ptr_n = tail_ptr; - schedule_ptr_n = schedule_ptr; - restore_ptr_n = restore_ptr; + valid_table_n = valid_table_x; + ready_table_n = ready_table_x; + addr_table_n = addr_table; + dequeue_val_n = dequeue_val_r; + dequeue_id_n = dequeue_id_r; - if (lookup_fill) begin - // unlock pending requests for scheduling - ready_table_n |= valid_address_match; + if (dequeue_fire) begin + dequeue_val_n = dequeue_val_x; + dequeue_id_n = dequeue_id_x; end - if (schedule) begin - // schedule next entry - schedule_ptr_n = schedule_ptr + 1; - valid_table_n[schedule_ptr] = 0; - ready_table_n[schedule_ptr] = 0; + if (allocate_fire) begin + valid_table_n[allocate_id] = 1; + ready_table_n[allocate_id] = 0; + addr_table_n[allocate_id] = allocate_addr; end - if (fill_start && (fill_addr == addr_table[schedule_ptr])) begin - ready_table_n[schedule_ptr] = valid_table[schedule_ptr]; + if (fill_valid) begin + dequeue_val_n = 1; + dequeue_id_n = fill_id; end - if (push_new) begin - // push new entry - valid_table_n[tail_ptr] = 1; - ready_table_n[tail_ptr] = enqueue_as_ready; - tail_ptr_n = tail_ptr + 1; - end else if (restore) begin - // restore schedule, returning missed mshr entry - valid_table_n[restore_ptr] = 1; - ready_table_n[restore_ptr] = enqueue_as_ready; - restore_ptr_n = restore_ptr + 1; - schedule_ptr_n = head_ptr; - end else if (dequeue) begin - // clear scheduled entry - head_ptr_n = head_ptr + 1; - restore_ptr_n = head_ptr_n; - end + if (release_valid) begin + valid_table_n[release_id] = 0; + end end always @(posedge clk) begin if (reset) begin - valid_table <= 0; - ready_table <= 0; - head_ptr <= 0; - tail_ptr <= 0; - schedule_ptr <= 0; - restore_ptr <= 0; - used_r <= 0; - alm_full_r <= 0; - full_r <= 0; - valid_out_r <= 0; + valid_table <= 0; + allocate_rdy_r <= 0; + dequeue_val_r <= 0; end else begin - if (schedule) begin - assert(schedule_valid); - assert(!fill_start); - assert(!restore); - end - - if (push_new) begin - assert(!full_r); - end else if (restore) begin - assert(!schedule); - end - - if (push_new) begin - if (!dequeue) begin - if (used_r == ADDRW'(ALM_FULL-1)) - alm_full_r <= 1; - if (used_r == ADDRW'(MSHR_SIZE-1)) - full_r <= 1; - end - end else if (dequeue) begin - if (used_r == ADDRW'(ALM_FULL)) - alm_full_r <= 0; - full_r <= 0; - end - - used_r <= used_r + ADDRW'($signed(2'(push_new) - 2'(dequeue))); - - valid_table <= valid_table_n; - ready_table <= ready_table_n; - head_ptr <= head_ptr_n; - tail_ptr <= tail_ptr_n; - schedule_ptr <= schedule_ptr_n; - restore_ptr <= restore_ptr_n; - valid_out_r <= ready_table_n[schedule_ptr_n]; + valid_table <= valid_table_n; + allocate_rdy_r <= allocate_rdy_n; + dequeue_val_r <= dequeue_val_n; end + ready_table <= ready_table_n; + addr_table <= addr_table_n; + dequeue_id_r <= dequeue_id_n; + allocate_id_r <= allocate_id_n; - if (push_new) begin - addr_table[tail_ptr] <= enqueue_addr; - end + assert(!allocate_fire || !valid_table[allocate_id_r]); + assert(!release_valid || valid_table[release_id]); end + `RUNTIME_ASSERT((!fill_valid || valid_table[fill_id]), ("%t: *** cache%0d:%0d invalid fill: addr=%0h, id=%0d", $time, CACHE_ID, BANK_ID, + `LINE_TO_BYTE_ADDR(addr_table[fill_id], BANK_ID), fill_id)) + VX_dp_ram #( .DATAW (`MSHR_DATA_WIDTH), .SIZE (MSHR_SIZE), @@ -188,43 +178,56 @@ module VX_miss_resrv #( .FASTRAM (1) ) entries ( .clk (clk), - .waddr (tail_ptr), - .raddr (schedule_ptr), - .wren (push_new), + .waddr (allocate_id_r), + .raddr (dequeue_id_r), + .wren (allocate_valid), .byteen (1'b1), .rden (1'b1), - .din (enqueue_data), - .dout (schedule_data) + .din (allocate_data), + .dout (dequeue_data) ); - assign lookup_match = (| valid_address_match); - assign schedule_valid = valid_out_r; - assign schedule_addr = addr_table[schedule_ptr]; - assign enqueue_almfull = alm_full_r; - assign enqueue_full = full_r; + assign allocate_ready = allocate_rdy_r; + assign allocate_id = allocate_id_r; + + assign dequeue_valid = dequeue_val_r; + assign dequeue_id = dequeue_id_r; + assign dequeue_addr = addr_table[dequeue_id_r]; + + assign lookup_match = (| addr_match); + + `UNUSED_VAR (lookup_valid) `ifdef DBG_PRINT_CACHE_MSHR - always @(posedge clk) begin - if (lookup_fill || schedule || enqueue || dequeue) begin - if (schedule) - $display("%t: cache%0d:%0d mshr-schedule: addr%0d=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, schedule_ptr, `LINE_TO_BYTE_ADDR(schedule_addr, BANK_ID), deq_debug_wid, deq_debug_pc); - if (enqueue) begin - if (enqueue_is_mshr) - $display("%t: cache%0d:%0d mshr-restore: addr%0d=%0h, ready=%b", $time, CACHE_ID, BANK_ID, restore_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr, BANK_ID), enqueue_as_ready); - else - $display("%t: cache%0d:%0d mshr-enqueue: addr%0d=%0h, ready=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, tail_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr, BANK_ID), enqueue_as_ready, enq_debug_wid, enq_debug_pc); - end - if (dequeue) - $display("%t: cache%0d:%0d mshr-dequeue addr%0d, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, head_ptr, enq_debug_wid, enq_debug_pc); + always @(posedge clk) begin + if (allocate_fire || fill_valid || dequeue_fire || lookup_replay || lookup_valid || release_valid) begin + if (allocate_fire) + $display("%t: cache%0d:%0d mshr-allocate: addr=%0h, id=%0d, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, + `LINE_TO_BYTE_ADDR(allocate_addr, BANK_ID), allocate_id, deq_debug_wid, deq_debug_pc); + if (fill_valid) + $display("%t: cache%0d:%0d mshr-fill: addr=%0h, id=%0d", $time, CACHE_ID, BANK_ID, + `LINE_TO_BYTE_ADDR(addr_table[fill_id], BANK_ID), fill_id); + if (dequeue_fire) + $display("%t: cache%0d:%0d mshr-dequeue: addr=%0h, id=%0d, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, + `LINE_TO_BYTE_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_debug_wid, deq_debug_pc); + if (lookup_replay) + $display("%t: cache%0d:%0d mshr-replay: addr=%0h, id=%0d", $time, CACHE_ID, BANK_ID, + `LINE_TO_BYTE_ADDR(lookup_addr, BANK_ID), lookup_id); + if (lookup_valid) + $display("%t: cache%0d:%0d mshr-lookup: addr=%0h, id=%0d, match=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, + `LINE_TO_BYTE_ADDR(lookup_addr, BANK_ID), lookup_id, lookup_match, lkp_debug_wid, lkp_debug_pc); + if (release_valid) + $display("%t: cache%0d:%0d mshr-release id=%0d, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, + release_id, rel_debug_wid, rel_debug_pc); $write("%t: cache%0d:%0d mshr-table", $time, CACHE_ID, BANK_ID); - for (integer j = 0; j < MSHR_SIZE; j++) begin - if (valid_table[j]) begin + for (integer i = 0; i < MSHR_SIZE; ++i) begin + if (valid_table[i]) begin $write(" "); - if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*"); - if (~ready_table[j]) $write("!"); - $write("addr%0d=%0h", j, `LINE_TO_BYTE_ADDR(addr_table[j], BANK_ID)); + if (ready_table[i]) + $write("*"); + $write("%0d=%0h", i, `LINE_TO_BYTE_ADDR(addr_table[i], BANK_ID)); end - end + end $write("\n"); end end diff --git a/hw/rtl/cache/VX_tag_access.v b/hw/rtl/cache/VX_tag_access.v index 7759cd13..3cd69384 100644 --- a/hw/rtl/cache/VX_tag_access.v +++ b/hw/rtl/cache/VX_tag_access.v @@ -23,6 +23,8 @@ module VX_tag_access #( input wire[`NW_BITS-1:0] debug_wid, `IGNORE_UNUSED_END `endif + + input wire stall, // read/fill input wire lookup, @@ -51,7 +53,7 @@ module VX_tag_access #( ) tag_store ( .clk(clk), .addr(line_addr), - .wren(fill), + .wren(fill && ~stall), .byteen(1'b1), .rden(1'b1), .din({!is_flush, line_tag}), @@ -60,18 +62,18 @@ module VX_tag_access #( assign tag_match = read_valid && (line_tag == read_tag); + `RUNTIME_ASSERT((~(fill && ~stall && ~is_flush) || ~tag_match), ("%t: redundant fill - addr=%0h, tag_id=%0h", $time, `LINE_TO_BYTE_ADDR(addr, BANK_ID), read_tag)) + `ifdef DBG_PRINT_CACHE_TAG always @(posedge clk) begin - if (fill) begin + if (fill && ~stall) begin if (is_flush) begin $display("%t: cache%0d:%0d tag-flush: addr=%0h, blk_addr=%0d", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr); end else begin $display("%t: cache%0d:%0d tag-fill: addr=%0h, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, line_tag, read_tag); - if (tag_match) begin - $display("%t: warning: redundant fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr, BANK_ID)); - end end - end else if (lookup) begin + end + if (lookup && ~stall) begin if (tag_match) begin $display("%t: cache%0d:%0d tag-hit: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, line_tag); end else begin