From d9425cc484c35052a6741d4714904ccc595270d8 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 15 Jul 2021 11:59:49 -0700 Subject: [PATCH] cache elastic buffer optimization --- hw/rtl/cache/VX_bank.v | 90 ++++++++++++++++---------------- hw/rtl/cache/VX_cache.v | 35 +++++-------- hw/rtl/cache/VX_shared_mem.v | 99 +++++++++++++++++++----------------- 3 files changed, 108 insertions(+), 116 deletions(-) diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 8e54a00b..d82ef98d 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -50,7 +50,7 @@ module VX_bank #( `endif // Core Request - input wire [NUM_PORTS-1:0] core_req_valid, + input wire core_req_valid, input wire [NUM_PORTS-1:0] core_req_pmask, input wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] core_req_wsel, input wire [NUM_PORTS-1:0][WORD_SIZE-1:0] core_req_byteen, @@ -97,10 +97,7 @@ module VX_bank #( `IGNORE_WARNINGS_END `endif - wire creq_pop; - wire creq_full; - wire creq_empty; - wire [NUM_PORTS-1:0] creq_pmask; + wire [NUM_PORTS-1:0] creq_pmask; wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] creq_wsel; wire [NUM_PORTS-1:0][WORD_SIZE-1:0] creq_byteen; wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] creq_data; @@ -108,26 +105,22 @@ module VX_bank #( wire creq_rw; wire [`LINE_ADDR_WIDTH-1:0] creq_addr; wire [CORE_TAG_WIDTH-1:0] creq_tag; - - wire creq_push = core_req_valid && core_req_ready; - assign core_req_ready = !creq_full; - VX_fifo_queue #( - .DATAW (CORE_TAG_WIDTH + 1 + `LINE_ADDR_WIDTH + (1 + `UP(`WORD_SELECT_BITS) + WORD_SIZE + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS), + wire creq_out_valid, creq_out_ready; + + VX_elastic_buffer #( + .DATAW (CORE_TAG_WIDTH + 1 + `LINE_ADDR_WIDTH + (1 + `UP(`WORD_SELECT_BITS) + WORD_SIZE + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS), .SIZE (CREQ_SIZE), .BUFFERED (1) ) core_req_queue ( .clk (clk), .reset (reset), - .push (creq_push), - .pop (creq_pop), + .ready_in (core_req_ready), + .valid_in (core_req_valid), .data_in ({core_req_tag, core_req_rw, core_req_addr, core_req_pmask, core_req_wsel, core_req_byteen, core_req_data, core_req_tid}), .data_out ({creq_tag, creq_rw, creq_addr, creq_pmask, creq_wsel, creq_byteen, creq_data, creq_tid}), - .empty (creq_empty), - .full (creq_full), - `UNUSED_PIN (alm_empty), - `UNUSED_PIN (alm_full), - `UNUSED_PIN (size) + .ready_out (creq_out_ready), + .valid_out (creq_out_valid) ); wire mshr_alm_full; @@ -166,8 +159,8 @@ module VX_bank #( wire crsq_in_valid, crsq_in_ready, crsq_in_stall; wire mreq_alm_full; - wire mrsq_pop; + wire creq_out_fire = creq_out_valid && creq_out_ready; wire crsq_in_fire = crsq_in_valid && crsq_in_ready; VX_pending_size #( @@ -175,7 +168,7 @@ module VX_bank #( ) mshr_pending_size ( .clk (clk), .reset (reset), - .push (creq_pop && !creq_rw), + .push (creq_out_fire && !creq_rw), .pop (crsq_in_fire), .full (mshr_alm_full), `UNUSED_PIN (empty), @@ -183,26 +176,29 @@ module VX_bank #( ); // determine which queue to pop next in priority order - wire mshr_pop_unqual = mshr_valid - && !mreq_alm_full; // ensure memory request queue not full (deadlock prevention) - wire mrsq_pop_unqual = !mshr_pop_unqual && mem_rsp_valid; - wire creq_pop_unqual = !mshr_pop_unqual && !mrsq_pop_unqual && !creq_empty && !flush_enable; + wire mshr_grant = !mreq_alm_full; // ensure memory request queue not full (deadlock prevention) + wire mshr_enable = mshr_grant && mshr_valid; + + wire mrsq_grant = !mshr_enable; + wire mrsq_enable = mrsq_grant && mem_rsp_valid; + + wire creq_grant = !mshr_enable && !mrsq_enable && !flush_enable; wire is_miss_st1 = valid_st1 && (miss_st1 || force_miss_st1); - assign mshr_pop = mshr_pop_unqual + assign mshr_pop = mshr_enable && !(is_miss_st1 && is_mshr_st1) // do not schedule another mshr request if the previous one missed - && !crsq_in_stall; // ensure core response ready + && !crsq_in_stall; // ensure core response ready - assign mrsq_pop = mrsq_pop_unqual - && !crsq_in_stall; // ensure core response ready + assign creq_out_ready = creq_grant + && !mreq_alm_full // ensure memory request ready + && !mshr_alm_full // ensure mshr enqueue ready + && !crsq_in_stall; // ensure core response ready - assign creq_pop = creq_pop_unqual - && !mreq_alm_full // ensure memory request ready - && !mshr_alm_full // ensure mshr enqueue ready - && !crsq_in_stall; // ensure core response ready + assign mem_rsp_ready = mrsq_grant + && !crsq_in_stall; // ensure core response ready - assign mem_rsp_ready = mrsq_pop; + wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready; // we have a miss in mshr or entering it for the current address wire mshr_pending_sel = mshr_pending @@ -210,7 +206,7 @@ module VX_bank #( `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin - assign {debug_pc_sel, debug_wid_sel} = mshr_pop_unqual ? mshr_tag[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS] : creq_tag[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; + assign {debug_pc_sel, debug_wid_sel} = mshr_enable ? mshr_tag[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS] : creq_tag[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; end else begin assign {debug_pc_sel, debug_wid_sel} = 0; end @@ -245,18 +241,18 @@ module VX_bank #( .reset (reset), .enable (!crsq_in_stall), .data_in ({ - flush_enable || mshr_pop || mrsq_pop || creq_pop, + flush_enable || mshr_pop || mem_rsp_fire || creq_out_fire, flush_enable, - mshr_pop_unqual, - mrsq_pop_unqual || flush_enable, - mshr_pop_unqual ? 1'b0 : creq_rw, - mshr_pop_unqual ? mshr_addr : (mem_rsp_valid ? mem_rsp_addr : (flush_enable ? `LINE_ADDR_WIDTH'(flush_addr) : creq_addr)), - mem_rsp_valid ? mem_rsp_data : creq_line_data, - mshr_pop_unqual ? mshr_wsel : creq_wsel, - mshr_pop_unqual ? mshr_byteen : creq_byteen, - mshr_pop_unqual ? mshr_tid : creq_tid, - mshr_pop_unqual ? mshr_pmask : creq_pmask, - mshr_pop_unqual ? mshr_tag : creq_tag, + mshr_enable, + mrsq_enable || flush_enable, + mshr_enable ? 1'b0 : creq_rw, + mshr_enable ? mshr_addr : (mem_rsp_valid ? mem_rsp_addr : (flush_enable ? `LINE_ADDR_WIDTH'(flush_addr) : creq_addr)), + mem_rsp_valid ? mem_rsp_data : creq_line_data, + mshr_enable ? mshr_wsel : creq_wsel, + mshr_enable ? mshr_byteen : creq_byteen, + mshr_enable ? mshr_tid : creq_tid, + mshr_enable ? mshr_pmask : creq_pmask, + mshr_enable ? mshr_tag : creq_tag, mshr_pending_sel }), .data_out ({valid_st0, is_flush_st0, is_mshr_st0, is_fill_st0, mem_rw_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_pending_st0}) @@ -444,7 +440,7 @@ module VX_bank #( .lookup_match (mshr_pending), // fill update - .fill_update (mrsq_pop), + .fill_update (mem_rsp_fire), // schedule .schedule (mshr_pop), @@ -562,13 +558,13 @@ module VX_bank #( if (flush_enable) begin $display("%t: cache%0d:%0d flush: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(flush_addr, BANK_ID)); end - if (mrsq_pop) begin + if (mem_rsp_fire) begin $display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_data); end if (mshr_pop) begin $display("%t: cache%0d:%0d mshr-rd-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mshr_addr, BANK_ID), mshr_tag, mshr_pmask, mshr_tid, mshr_byteen, debug_wid_sel, debug_pc_sel); end - if (creq_pop) begin + if (creq_out_fire) begin if (creq_rw) $display("%t: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, creq_data, debug_wid_sel, debug_pc_sel); else diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index 581a8639..83fc69da 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -241,32 +241,25 @@ module VX_cache #( wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_qual; wire [`MEM_ADDR_WIDTH-1:0] mem_rsp_tag_nc_a, mem_rsp_tag_qual; - - wire mrsq_full, mrsq_empty; - wire mrsq_push, mrsq_pop; - assign mrsq_push = mem_rsp_valid_nc && mem_rsp_ready_nc; - assign mem_rsp_ready_nc = !mrsq_full; + wire mrsq_out_valid, mrsq_out_ready; // trim out shared memory and non-cacheable flags assign mem_rsp_tag_nc_a = mem_rsp_tag_nc[NC_ENABLE +: `MEM_ADDR_WIDTH]; - VX_fifo_queue #( + VX_elastic_buffer #( .DATAW (`MEM_ADDR_WIDTH + `CACHE_LINE_WIDTH), .SIZE (MRSQ_SIZE), .BUFFERED (1) ) mem_rsp_queue ( .clk (clk), .reset (reset), - .push (mrsq_push), - .pop (mrsq_pop), + .ready_in (mem_rsp_ready_nc), + .valid_in (mem_rsp_valid_nc), .data_in ({mem_rsp_tag_nc_a, mem_rsp_data_nc}), - .data_out ({mem_rsp_tag_qual, mem_rsp_data_qual}), - .empty (mrsq_empty), - .full (mrsq_full), - `UNUSED_PIN (alm_full), - `UNUSED_PIN (alm_empty), - `UNUSED_PIN (size) + .data_out ({mem_rsp_tag_qual, mem_rsp_data_qual}), + .ready_out (mrsq_out_ready), + .valid_out (mrsq_out_valid) ); `UNUSED_VAR (mem_rsp_tag_nc) @@ -289,7 +282,7 @@ module VX_cache #( /////////////////////////////////////////////////////////////////////////// - wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_req_valid; + wire [NUM_BANKS-1:0] per_bank_core_req_valid; wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_req_pmask; wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] per_bank_core_req_wsel; wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen; @@ -318,9 +311,9 @@ module VX_cache #( if (NUM_BANKS == 1) begin `UNUSED_VAR (mem_rsp_tag_qual) - assign mrsq_pop = !mrsq_empty && per_bank_mem_rsp_ready; + assign mrsq_out_ready = per_bank_mem_rsp_ready; end else begin - assign mrsq_pop = !mrsq_empty && per_bank_mem_rsp_ready[`MEM_ADDR_BANK(mem_rsp_tag_qual)]; + assign mrsq_out_ready = per_bank_mem_rsp_ready[`MEM_ADDR_BANK(mem_rsp_tag_qual)]; end VX_core_req_bank_sel #( @@ -360,7 +353,7 @@ module VX_cache #( /////////////////////////////////////////////////////////////////////////// for (genvar i = 0; i < NUM_BANKS; i++) begin - wire [NUM_PORTS-1:0] curr_bank_core_req_valid; + wire curr_bank_core_req_valid; wire [NUM_PORTS-1:0] curr_bank_core_req_pmask; wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] curr_bank_core_req_wsel; wire [NUM_PORTS-1:0][WORD_SIZE-1:0] curr_bank_core_req_byteen; @@ -424,10 +417,10 @@ module VX_cache #( // Memory response if (NUM_BANKS == 1) begin - assign curr_bank_mem_rsp_valid = !mrsq_empty; + assign curr_bank_mem_rsp_valid = mrsq_out_valid; assign curr_bank_mem_rsp_addr = mem_rsp_tag_qual; end else begin - assign curr_bank_mem_rsp_valid = !mrsq_empty && (`MEM_ADDR_BANK(mem_rsp_tag_qual) == i); + assign curr_bank_mem_rsp_valid = mrsq_out_valid && (`MEM_ADDR_BANK(mem_rsp_tag_qual) == i); assign curr_bank_mem_rsp_addr = `MEM_TO_LINE_ADDR(mem_rsp_tag_qual); end assign curr_bank_mem_rsp_data = mem_rsp_data_qual; @@ -464,7 +457,7 @@ module VX_cache #( // Core request .core_req_valid (curr_bank_core_req_valid), - .core_req_pmask (curr_bank_core_req_pmask), + .core_req_pmask (curr_bank_core_req_pmask), .core_req_rw (curr_bank_core_req_rw), .core_req_byteen (curr_bank_core_req_byteen), .core_req_addr (curr_bank_core_req_addr), diff --git a/hw/rtl/cache/VX_shared_mem.v b/hw/rtl/cache/VX_shared_mem.v index 04feb44d..94b030ec 100644 --- a/hw/rtl/cache/VX_shared_mem.v +++ b/hw/rtl/cache/VX_shared_mem.v @@ -107,19 +107,24 @@ module VX_shared_mem #( wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag; wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid; - wire creq_push, creq_pop, creq_empty, creq_full; + wire creq_in_ready; + wire creq_out_valid; wire crsq_in_fire_last; - wire [NUM_BANKS-1:0] per_bank_rsp_valid = per_bank_core_req_valid & ~per_bank_core_req_rw; + wire [NUM_BANKS-1:0] per_bank_req_reads = per_bank_core_req_valid & ~per_bank_core_req_rw; - wire core_req_has_read = (| per_bank_rsp_valid); - - assign creq_push = (| core_req_valid) && ~creq_full; + wire per_bank_req_has_reads = (| per_bank_req_reads); - assign creq_pop = (~creq_empty && ~core_req_has_read) - || crsq_in_fire_last; + wire creq_in_valid = (| core_req_valid); + + wire creq_out_ready = ~per_bank_req_has_reads // is write only + || crsq_in_fire_last; // is sending last read response - assign per_bank_core_req_ready_unqual = ~creq_full; + assign per_bank_core_req_ready_unqual = creq_in_ready; + + wire creq_in_fire = creq_in_valid && creq_in_ready; + + wire creq_out_fire = creq_out_valid && creq_out_ready; wire [NUM_BANKS-1:0][`LINE_SELECT_BITS-1:0] per_bank_core_req_addr_qual; `UNUSED_VAR (per_bank_core_req_addr_unqual) @@ -127,35 +132,33 @@ module VX_shared_mem #( assign per_bank_core_req_addr_qual[i] = per_bank_core_req_addr_unqual[i][`LINE_SELECT_BITS-1:0]; end - VX_fifo_queue #( + VX_elastic_buffer #( .DATAW (NUM_BANKS * (1 + 1 + `LINE_SELECT_BITS + WORD_SIZE + `WORD_WIDTH + CORE_TAG_WIDTH + `REQS_BITS)), .SIZE (CREQ_SIZE), - .BUFFERED (1) + .BUFFERED (1) // output should be registered for the data_store addr port ) core_req_queue ( - .clk (clk), - .reset (reset), - .push (creq_push), - .pop (creq_pop), - .data_in ({per_bank_core_req_valid_unqual, - per_bank_core_req_rw_unqual, - per_bank_core_req_addr_qual, - per_bank_core_req_byteen_unqual, - per_bank_core_req_data_unqual, - per_bank_core_req_tag_unqual, - per_bank_core_req_tid_unqual}), - .data_out({per_bank_core_req_valid, - per_bank_core_req_rw, - per_bank_core_req_addr, - per_bank_core_req_byteen, - per_bank_core_req_data, - per_bank_core_req_tag, - per_bank_core_req_tid}), - .empty (creq_empty), - .full (creq_full), - `UNUSED_PIN (alm_empty), - `UNUSED_PIN (alm_full), - `UNUSED_PIN (size) + .clk (clk), + .reset (reset), + .ready_in (creq_in_ready), + .valid_in (creq_in_valid), + .data_in ({per_bank_core_req_valid_unqual, + per_bank_core_req_rw_unqual, + per_bank_core_req_addr_qual, + per_bank_core_req_byteen_unqual, + per_bank_core_req_data_unqual, + per_bank_core_req_tag_unqual, + per_bank_core_req_tid_unqual}), + .data_out ({per_bank_core_req_valid, + per_bank_core_req_rw, + per_bank_core_req_addr, + per_bank_core_req_byteen, + per_bank_core_req_data, + per_bank_core_req_tag, + per_bank_core_req_tid}), + .ready_out (creq_out_ready), + .valid_out (creq_out_valid) ); + `UNUSED_VAR (creq_in_fire) wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data; @@ -163,14 +166,14 @@ module VX_shared_mem #( wire wren = per_bank_core_req_rw[i] && per_bank_core_req_valid[i] - && creq_pop; + && creq_out_fire; VX_sp_ram #( .DATAW (`WORD_WIDTH), .SIZE (`LINES_PER_BANK), .BYTEENW (WORD_SIZE), .RWCHECK (1) - ) data ( + ) data_store ( .clk (clk), .addr (per_bank_core_req_addr[i]), .wren (wren), @@ -187,23 +190,23 @@ module VX_shared_mem #( wire crsq_in_valid, crsq_in_ready; - reg [NUM_BANKS-1:0] bank_rsp_sel, bank_rsp_sel_r; + reg [NUM_BANKS-1:0] bank_rsp_sel_prv, bank_rsp_sel_cur; - wire [NUM_BANKS-1:0] bank_rsp_sel_n = bank_rsp_sel | bank_rsp_sel_r; + wire [NUM_BANKS-1:0] bank_rsp_sel_n = bank_rsp_sel_prv | bank_rsp_sel_cur; wire crsq_in_fire = crsq_in_valid && crsq_in_ready; - assign crsq_in_fire_last = crsq_in_fire && (bank_rsp_sel_n == per_bank_rsp_valid); + assign crsq_in_fire_last = crsq_in_fire && (bank_rsp_sel_n == per_bank_req_reads); always @(posedge clk) begin if (reset) begin - bank_rsp_sel <= 0; + bank_rsp_sel_prv <= 0; end else begin if (crsq_in_fire) begin - if (bank_rsp_sel_n == per_bank_rsp_valid) begin - bank_rsp_sel <= 0; + if (bank_rsp_sel_n == per_bank_req_reads) begin + bank_rsp_sel_prv <= 0; end else begin - bank_rsp_sel <= bank_rsp_sel_n; + bank_rsp_sel_prv <= bank_rsp_sel_n; end end end @@ -217,10 +220,10 @@ module VX_shared_mem #( core_rsp_valids_in = 0; core_rsp_data_in = 'x; core_rsp_tag_in = 'x; - bank_rsp_sel_r = 0; + bank_rsp_sel_cur = 0; for (integer i = NUM_BANKS-1; i >= 0; --i) begin - if (per_bank_rsp_valid[i] && ~bank_rsp_sel[i]) begin + if (per_bank_req_reads[i] && ~bank_rsp_sel_prv[i]) begin core_rsp_tag_in = per_bank_core_req_tag[i]; end end @@ -230,12 +233,12 @@ module VX_shared_mem #( && (core_rsp_tag_in[CORE_TAG_ID_BITS-1:0] == per_bank_core_req_tag[i][CORE_TAG_ID_BITS-1:0])) begin core_rsp_valids_in[per_bank_core_req_tid[i]] = 1; core_rsp_data_in[per_bank_core_req_tid[i]] = per_bank_core_rsp_data[i]; - bank_rsp_sel_r[i] = 1; + bank_rsp_sel_cur[i] = 1; end end end - assign crsq_in_valid = ~creq_empty && core_req_has_read; + assign crsq_in_valid = creq_out_valid && per_bank_req_has_reads; VX_skid_buffer #( .DATAW (NUM_BANKS * (1 + `WORD_WIDTH) + CORE_TAG_WIDTH) @@ -297,7 +300,7 @@ module VX_shared_mem #( if (is_multi_tag_req) begin $display("%t: *** cache%0d multi-tag request!", $time, CACHE_ID); end - if (creq_push) begin + if (creq_in_fire) begin for (integer i = 0; i < NUM_BANKS; ++i) begin if (per_bank_core_req_valid_unqual[i]) begin if (per_bank_core_req_rw_unqual[i]) begin @@ -312,7 +315,7 @@ module VX_shared_mem #( end end end - if (creq_pop) begin + if (creq_out_fire) begin for (integer i = 0; i < NUM_BANKS; ++i) begin if (per_bank_core_req_valid[i]) begin if (per_bank_core_req_rw[i]) begin