diff --git a/hw/rtl/cache/VX_shared_mem.v b/hw/rtl/cache/VX_shared_mem.v index e9b867d7..3ebf485d 100644 --- a/hw/rtl/cache/VX_shared_mem.v +++ b/hw/rtl/cache/VX_shared_mem.v @@ -4,25 +4,25 @@ module VX_shared_mem #( parameter CACHE_ID = 0, // Size of cache in bytes - parameter CACHE_SIZE = 16384, + parameter CACHE_SIZE = (1024*16), // Number of banks - parameter NUM_BANKS = 4, + parameter NUM_BANKS = 2, // Size of a word in bytes parameter WORD_SIZE = 4, // Number of Word requests per cycle - parameter NUM_REQS = NUM_BANKS, + parameter NUM_REQS = 4, // Core Request Queue Size - parameter CREQ_SIZE = 4, - - // core request tag size - parameter CORE_TAG_WIDTH = 1, + parameter CREQ_SIZE = 8, // size of tag id in core request tag - parameter CORE_TAG_ID_BITS = 0, + parameter CORE_TAG_ID_BITS = 8, + + // core request tag size + parameter CORE_TAG_WIDTH = (2 + CORE_TAG_ID_BITS), // bank offset from beginning of index range - parameter BANK_ADDR_OFFSET = 0 + parameter BANK_ADDR_OFFSET = `CLOG2(256) ) ( input wire clk, input wire reset, @@ -54,13 +54,6 @@ module VX_shared_mem #( localparam CACHE_LINE_SIZE = WORD_SIZE; -`ifdef DBG_CACHE_REQ_INFO - /* verilator lint_off UNUSED */ - wire [31:0] debug_pc_st0; - wire [`NW_BITS-1:0] debug_wid_st0; - /* verilator lint_on UNUSED */ -`endif - wire [NUM_BANKS-1:0] per_bank_core_req_valid_unqual; wire [NUM_BANKS-1:0] per_bank_core_req_rw_unqual; wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr_unqual; @@ -109,20 +102,26 @@ module VX_shared_mem #( wire [NUM_BANKS-1:0][`LINE_SELECT_BITS-1:0] per_bank_core_req_addr; wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen; wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data; - wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag; + wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag; wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid; wire creq_push, creq_pop, creq_empty, creq_full; - wire crsq_in_ready; + wire crsq_in_fire_last; + + wire [NUM_BANKS-1:0] per_bank_rsp_valid = per_bank_core_req_valid & ~per_bank_core_req_rw; + + wire core_req_has_read = (| per_bank_rsp_valid); - assign creq_push = (| core_req_valid) && !creq_full; - assign creq_pop = ~creq_empty && crsq_in_ready; + assign creq_push = (| core_req_valid) && ~creq_full; + + assign creq_pop = (~creq_empty && ~core_req_has_read) + || crsq_in_fire_last; assign per_bank_core_req_ready_unqual = ~creq_full; - wire [NUM_REQS-1:0][`LINE_SELECT_BITS-1:0] per_bank_core_req_addr_qual; + wire [NUM_BANKS-1:0][`LINE_SELECT_BITS-1:0] per_bank_core_req_addr_qual; `UNUSED_VAR (per_bank_core_req_addr_unqual) - for (genvar i = 0; i < NUM_REQS; i++) begin + for (genvar i = 0; i < NUM_BANKS; i++) begin assign per_bank_core_req_addr_qual[i] = per_bank_core_req_addr_unqual[i][`LINE_SELECT_BITS-1:0]; end @@ -179,6 +178,34 @@ module VX_shared_mem #( .dout (per_bank_core_rsp_data[i]) ); end + + // The core response bus handles a single tag at the time + // We first need to select the current tag to process, + // then send all bank responses for that tag as a batch + + wire crsq_in_valid, crsq_in_ready; + + reg [NUM_BANKS-1:0] bank_rsp_sel, bank_rsp_sel_r; + + wire [NUM_BANKS-1:0] bank_rsp_sel_n = bank_rsp_sel | bank_rsp_sel_r; + + wire crsq_in_fire = crsq_in_valid && crsq_in_ready; + + assign crsq_in_fire_last = crsq_in_fire && (bank_rsp_sel_n == per_bank_rsp_valid); + + always @(posedge clk) begin + if (reset) begin + bank_rsp_sel <= 0; + end else begin + if (crsq_in_fire) begin + if (bank_rsp_sel_n == per_bank_rsp_valid) begin + bank_rsp_sel <= 0; + end else begin + bank_rsp_sel <= bank_rsp_sel_n; + end + end + end + end reg [NUM_REQS-1:0] core_rsp_valids_in; reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_in; @@ -186,31 +213,30 @@ module VX_shared_mem #( always @(*) begin core_rsp_valids_in = 0; - core_rsp_data_in = 'x; + core_rsp_data_in = 'x; core_rsp_tag_in = 'x; - for (integer i = 0; i < NUM_BANKS; i++) begin - if (per_bank_core_req_valid[i]) begin - core_rsp_valids_in[per_bank_core_req_tid[i]] = 1; - core_rsp_data_in[per_bank_core_req_tid[i]] = per_bank_core_rsp_data[i]; + bank_rsp_sel_r = 0; + + for (integer i = NUM_BANKS-1; i >= 0; --i) begin + if (per_bank_rsp_valid[i] && ~bank_rsp_sel[i]) begin core_rsp_tag_in = per_bank_core_req_tag[i]; end end - end - -`ifdef DBG_CACHE_REQ_INFO - if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin - assign {debug_pc_st0, debug_wid_st0} = core_rsp_tag_in[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; - end else begin - assign {debug_pc_st0, debug_wid_st0} = 0; + + for (integer i = 0; i < NUM_BANKS; i++) begin + if (per_bank_core_req_valid[i] + && (core_rsp_tag_in[CORE_TAG_ID_BITS-1:0] == per_bank_core_req_tag[i][CORE_TAG_ID_BITS-1:0])) begin + core_rsp_valids_in[per_bank_core_req_tid[i]] = 1; + core_rsp_data_in[per_bank_core_req_tid[i]] = per_bank_core_rsp_data[i]; + bank_rsp_sel_r[i] = 1; + end + end end -`endif - + wire [NUM_REQS-1:0] core_rsp_valids_out; wire core_rsp_valid_out; - wire core_rsp_rw = | (per_bank_core_req_valid & per_bank_core_req_rw); - - wire crsq_in_valid = ~creq_empty && ~core_rsp_rw; + assign crsq_in_valid = ~creq_empty && core_req_has_read; VX_skid_buffer #( .DATAW (NUM_BANKS * (1 + `WORD_WIDTH) + CORE_TAG_WIDTH) @@ -227,25 +253,82 @@ module VX_shared_mem #( assign core_rsp_valid = core_rsp_valids_out & {NUM_REQS{core_rsp_valid_out}}; +`ifdef DBG_CACHE_REQ_INFO +`IGNORE_WARNINGS_BEGIN + wire [NUM_BANKS-1:0][31:0] debug_pc_st0, debug_pc_st1; + wire [NUM_BANKS-1:0][`NW_BITS-1:0] debug_wid_st0, debug_wid_st1; +`IGNORE_WARNINGS_END + + for (genvar i = 0; i < NUM_BANKS; ++i) begin + if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin + assign {debug_pc_st0[i], debug_wid_st0[i]} = per_bank_core_req_tag_unqual[i][CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; + assign {debug_pc_st1[i], debug_wid_st1[i]} = per_bank_core_req_tag[i][CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; + end else begin + assign {debug_pc_st0[i], debug_wid_st0[i]} = 0; + assign {debug_pc_st1[i], debug_wid_st1[i]} = 0; + end + end +`endif + `ifdef DBG_PRINT_CACHE_BANK + + reg is_multi_tag_req; +`IGNORE_WARNINGS_BEGIN + reg [CORE_TAG_WIDTH-1:0] core_req_tag_sel; +`IGNORE_WARNINGS_END + + always @(*) begin + core_req_tag_sel ='x; + for (integer i = NUM_BANKS-1; i >= 0; --i) begin + if (per_bank_core_req_valid[i]) begin + core_req_tag_sel = per_bank_core_req_tag[i]; + end + end + is_multi_tag_req = 0; + for (integer i = 0; i < NUM_BANKS; ++i) begin + if (per_bank_core_req_valid[i] + && (core_req_tag_sel[CORE_TAG_ID_BITS-1:0] != per_bank_core_req_tag[i][CORE_TAG_ID_BITS-1:0])) begin + is_multi_tag_req = !creq_empty; + end + end + end + always @(posedge clk) begin if (!crsq_in_ready) begin - $display("%t: cache%0d pipeline-stall", $time, CACHE_ID); + $display("%t: *** cache%0d pipeline-stall", $time, CACHE_ID); + end + if (is_multi_tag_req) begin + $display("%t: *** cache%0d multi-tag request!", $time, CACHE_ID); + end + if (creq_push) begin + for (integer i = 0; i < NUM_BANKS; ++i) begin + if (per_bank_core_req_valid_unqual[i]) begin + if (per_bank_core_req_rw_unqual[i]) begin + $display("%t: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h", + $time, CACHE_ID, i, per_bank_core_req_addr_unqual[i], per_bank_core_req_tag_unqual[i], per_bank_core_req_byteen_unqual[i], per_bank_core_req_data_unqual[i], + debug_wid_st0[i], debug_pc_st0[i]); + end else begin + $display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, byteen=%b, wid=%0d, PC=%0h", + $time, CACHE_ID, i, per_bank_core_req_addr_unqual[i], per_bank_core_req_tag_unqual[i], per_bank_core_req_byteen_unqual[i], + debug_wid_st0[i], debug_pc_st0[i]); + end + end + end end if (creq_pop) begin - if (core_rsp_rw) begin - $write("%t: cache%0d core-wr-req: tmask=%0b, addr=", $time, CACHE_ID, per_bank_core_req_valid); - end else begin - $write("%t: cache%0d core-rd-req: tmask=%0b, addr=", $time, CACHE_ID, per_bank_core_req_valid); + for (integer i = 0; i < NUM_BANKS; ++i) begin + if (per_bank_core_req_valid[i]) begin + if (per_bank_core_req_rw[i]) begin + $display("%t: cache%0d:%0d core-wr-rsp: addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h", + $time, CACHE_ID, i, per_bank_core_req_addr[i], per_bank_core_req_tag[i], per_bank_core_req_byteen[i], per_bank_core_req_data[i], + debug_wid_st1[i], debug_pc_st1[i]); + end else begin + $display("%t: cache%0d:%0d core-rd-rsp: addr=%0h, tag=%0h, byteen=%b, wid=%0d, PC=%0h", + $time, CACHE_ID, i, per_bank_core_req_addr[i], per_bank_core_req_tag[i], per_bank_core_req_byteen[i], + debug_wid_st1[i], debug_pc_st1[i]); + end + end end - `PRINT_ARRAY1D(per_bank_core_req_addr, `NUM_THREADS); - $write(", tag=%0h, byteen=%b, data=", per_bank_core_req_tag, per_bank_core_req_byteen); - if (core_rsp_rw) begin - `PRINT_ARRAY1D(per_bank_core_req_data, `NUM_THREADS); - end else begin - `PRINT_ARRAY1D(per_bank_core_rsp_data, `NUM_THREADS); - end - $write(", wid=%0d, PC=%0h\n", debug_wid_st0, debug_pc_st0); end end `endif