cache bank pipeline optimization

This commit is contained in:
Blaise Tine
2021-09-14 02:09:35 -07:00
parent 3d7baf1640
commit 4e8293c3e3
5 changed files with 138 additions and 148 deletions

168
hw/rtl/cache/VX_bank.v vendored
View File

@@ -129,11 +129,13 @@ module VX_bank #(
.data_out ({creq_rw, creq_addr, creq_pmask, creq_wsel, creq_byteen, creq_data, creq_tid, creq_tag}),
.ready_out (creq_ready),
.valid_out (creq_valid)
);
);
wire mshr_valid;
wire mshr_ready;
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id;
wire mshr_alm_full;
wire mshr_valid;
wire [MSHR_ADDR_WIDTH-1:0] mshr_dequeue_id;
wire [`LINE_ADDR_WIDTH-1:0] mshr_addr;
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] mshr_tag;
@@ -142,7 +144,8 @@ module VX_bank #(
wire [NUM_PORTS-1:0] mshr_pmask;
wire [`LINE_ADDR_WIDTH-1:0] addr_st0, addr_st1;
wire write_st0, write_st1;
wire is_read_st0, is_read_st1;
wire is_write_st0, is_write_st1;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] wsel_st0, wsel_st1;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] byteen_st0, byteen_st1;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] req_tid_st0, req_tid_st1;
@@ -160,33 +163,36 @@ module VX_bank #(
wire crsq_valid, crsq_ready, crsq_stall;
wire mreq_alm_full;
// prevent read-during-write hazard when accessing tags/data block RAMs
wire rdw_fill_hazard = valid_st0 && is_fill_st0;
wire rdw_write_hazard = valid_st0 && write_st0 && ~creq_rw;
wire rdw_write_hazard = valid_st0 && is_write_st0 && ~creq_rw;
// determine which queue to pop next in priority order
wire mshr_grant = 1;
wire mshr_grant = !flush_enable;
wire mshr_enable = mshr_grant && mshr_valid;
wire mrsq_grant = !mshr_enable;
wire mrsq_grant = !flush_enable && !mshr_enable;
wire mrsq_enable = mrsq_grant && mem_rsp_valid;
wire creq_grant = !flush_enable && !mshr_enable && !mrsq_enable;
wire creq_enable = creq_grant && creq_valid;
wire creq_grant = !mshr_enable && !mrsq_enable && !flush_enable;
wire mshr_ready = mshr_grant
&& !rdw_fill_hazard // prevent read-during-write hazard
&& !crsq_stall; // ensure core response ready
assign mshr_ready = mshr_grant
&& !rdw_fill_hazard // prevent read-during-write hazard
&& !crsq_stall; // ensure core_rsp_queue not full
assign mem_rsp_ready = mrsq_grant
&& !crsq_stall; // ensure core response ready
&& !crsq_stall; // ensure core_rsp_queue not full
assign creq_ready = creq_grant
&& !rdw_write_hazard // prevent read-during-write hazard
&& !mreq_alm_full // ensure memory request ready
&& !mshr_alm_full // ensure mshr enqueue ready
&& !crsq_stall; // ensure core response ready
&& !mreq_alm_full // ensure mem_req_queue not full
&& !mshr_alm_full // ensure mshr not full
&& !crsq_stall; // ensure core_rsp_queue not full
wire flush_fire = flush_enable;
wire mshr_fire = mshr_valid && mshr_ready;
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
wire creq_fire = creq_valid && creq_ready;
@@ -206,28 +212,29 @@ module VX_bank #(
end
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + NUM_PORTS * (WORD_SELECT_BITS + WORD_SIZE + `REQS_BITS + 1 + CORE_TAG_WIDTH) + MSHR_ADDR_WIDTH),
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + NUM_PORTS * (WORD_SELECT_BITS + WORD_SIZE + `REQS_BITS + 1 + CORE_TAG_WIDTH) + MSHR_ADDR_WIDTH),
.RESETW (1)
) pipe_reg0 (
.clk (clk),
.reset (reset),
.enable (!crsq_stall),
.data_in ({
flush_enable || mshr_fire || mem_rsp_fire || creq_fire,
flush_fire || mshr_fire || mem_rsp_fire || creq_fire,
flush_enable,
mrsq_enable || flush_enable,
mshr_enable,
creq_fire && creq_rw,
mshr_enable ? mshr_addr : (mem_rsp_valid ? mem_rsp_addr : (flush_enable ? `LINE_ADDR_WIDTH'(flush_addr) : creq_addr)),
mshr_valid,
mrsq_enable,
creq_enable && ~creq_rw,
creq_enable && creq_rw,
flush_enable ? `LINE_ADDR_WIDTH'(flush_addr) : (mshr_valid ? mshr_addr : (mem_rsp_valid ? mem_rsp_addr : creq_addr)),
wdata_sel,
mshr_enable ? mshr_wsel : creq_wsel,
mshr_valid ? mshr_wsel : creq_wsel,
creq_byteen,
mshr_enable ? mshr_tid : creq_tid,
mshr_enable ? mshr_pmask : creq_pmask,
mshr_enable ? mshr_tag : creq_tag,
mshr_enable ? mshr_dequeue_id : mem_rsp_id
mshr_valid ? mshr_tid : creq_tid,
mshr_valid ? mshr_pmask : creq_pmask,
mshr_valid ? mshr_tag : creq_tag,
mshr_valid ? mshr_dequeue_id : mem_rsp_id
}),
.data_out ({valid_st0, is_flush_st0, is_fill_st0, is_mshr_st0, write_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_id_st0})
.data_out ({valid_st0, is_flush_st0, is_mshr_st0, is_fill_st0, is_read_st0, is_write_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_id_st0})
);
`ifdef DBG_CACHE_REQ_INFO
@@ -238,8 +245,9 @@ module VX_bank #(
end
`endif
wire do_lookup_st0 = valid_st0 && ~is_fill_st0;
wire do_fill_st0 = valid_st0 && is_fill_st0;
wire do_flush_st0 = valid_st0 && is_flush_st0;
wire do_lookup_st0 = valid_st0 && ~(is_fill_st0 || is_flush_st0);
wire tag_match_st0;
@@ -252,39 +260,37 @@ module VX_bank #(
.WORD_SIZE (WORD_SIZE),
.BANK_ADDR_OFFSET (BANK_ADDR_OFFSET)
) tag_access (
.clk (clk),
.reset (reset),
.clk (clk),
.reset (reset),
`ifdef DBG_CACHE_REQ_INFO
.debug_pc (debug_pc_st0),
.debug_wid (debug_wid_st0),
.debug_pc (debug_pc_st0),
.debug_wid (debug_wid_st0),
`endif
.stall (crsq_stall),
.stall (crsq_stall),
// read/Fill
.lookup (do_lookup_st0),
.addr (addr_st0),
.fill (do_fill_st0),
.is_flush (is_flush_st0),
.tag_match (tag_match_st0)
.lookup (do_lookup_st0),
.addr (addr_st0),
.fill (do_fill_st0),
.flush (do_flush_st0),
.tag_match (tag_match_st0)
);
// we have a core request hit
assign miss_st0 = !is_fill_st0 && !tag_match_st0;
wire miss_st0 = (is_read_st0 || is_write_st0) && ~tag_match_st0;
wire read_st0 = !is_fill_st0 && !write_st0;
wire [MSHR_ADDR_WIDTH-1:0] mshr_id_qual_st0 = (!is_fill_st0 && !is_mshr_st0) ? mshr_alloc_id : mshr_id_st0;
wire [MSHR_ADDR_WIDTH-1:0] mshr_id_a_st0 = (is_read_st0 || is_write_st0) ? mshr_alloc_id : mshr_id_st0;
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + NUM_PORTS * (WORD_SELECT_BITS + WORD_SIZE + `REQS_BITS + 1 + CORE_TAG_WIDTH) + MSHR_ADDR_WIDTH + 1),
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + NUM_PORTS * (WORD_SELECT_BITS + WORD_SIZE + `REQS_BITS + 1 + CORE_TAG_WIDTH) + MSHR_ADDR_WIDTH + 1),
.RESETW (1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (!crsq_stall),
.data_in ({valid_st0, is_fill_st0, is_mshr_st0, miss_st0, write_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_id_qual_st0, mshr_pending_st0}),
.data_out ({valid_st1, is_fill_st1, is_mshr_st1, miss_st1, write_st1, addr_st1, wdata_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1, mshr_id_st1, mshr_pending_st1})
.data_in ({valid_st0, is_mshr_st0, is_fill_st0, is_read_st0, is_write_st0, miss_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_id_a_st0, mshr_pending_st0}),
.data_out ({valid_st1, is_mshr_st1, is_fill_st1, is_read_st1, is_write_st1, miss_st1, addr_st1, wdata_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1, mshr_id_st1, mshr_pending_st1})
);
`ifdef DBG_CACHE_REQ_INFO
@@ -295,18 +301,15 @@ module VX_bank #(
end
`endif
wire read_st1 = !is_fill_st1 && !write_st1;
wire writeen_st1 = (WRITE_ENABLE && write_st1 && !miss_st1)
|| is_fill_st1;
wire crsq_push_st1 = read_st1 && !miss_st1;
wire mreq_push_st1 = (read_st1 && miss_st1 && !mshr_pending_st1)
|| write_st1;
wire do_read_st0 = valid_st0 && is_read_st0;
wire do_read_st1 = valid_st1 && is_read_st1;
wire do_fill_st1 = valid_st1 && is_fill_st1;
wire do_write_st1 = valid_st1 && is_write_st1;
wire do_mshr_st1 = valid_st1 && is_mshr_st1;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] creq_data_st1 = wdata_st1[0 +: NUM_PORTS * `WORD_WIDTH];
`UNUSED_VAR (wdata_st1)
VX_data_access #(
.BANK_ID (BANK_ID),
.CACHE_ID (CACHE_ID),
@@ -321,32 +324,27 @@ module VX_bank #(
.reset (reset),
`ifdef DBG_CACHE_REQ_INFO
.debug_pc (debug_pc_st1),
.debug_wid (debug_wid_st1),
.debug_pc_st1 (debug_pc_st1),
.debug_wid_st1 (debug_wid_st1),
`endif
.stall (crsq_stall),
.read (do_read_st1 || do_mshr_st1),
.fill (do_fill_st1),
.write (do_write_st1 && !miss_st1),
.addr (addr_st1),
.wsel (wsel_st1),
.pmask (pmask_st1),
// reading
.readen (valid_st1 && read_st1),
.read_data (rdata_st1),
// writing
.writeen (valid_st1 && writeen_st1),
.is_fill (is_fill_st1),
.byteen (byteen_st1),
.fill_data (wdata_st1),
.write_data (creq_data_st1),
.fill_data (wdata_st1)
.read_data (rdata_st1)
);
wire mshr_allocate = valid_st0 && read_st0 && !is_mshr_st0 && !crsq_stall;
wire mshr_replay = do_fill_st0 && ~crsq_stall;
wire mshr_allocate = do_read_st0 && !crsq_stall;
wire mshr_replay = do_fill_st0 && !crsq_stall;
wire mshr_lookup = mshr_allocate;
wire mshr_release = valid_st1 && read_st1 && !is_mshr_st1 && !miss_st1 && !crsq_stall;
wire mshr_release = do_read_st1 && !miss_st1 && !crsq_stall;
VX_pending_size #(
.SIZE (MSHR_SIZE)
@@ -414,13 +412,15 @@ module VX_bank #(
);
// Enqueue core response
wire [NUM_PORTS-1:0] crsq_pmask;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] crsq_data;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] crsq_tid;
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] crsq_tag;
assign crsq_valid = (do_read_st1 && !miss_st1)
|| do_mshr_st1;
assign crsq_valid = valid_st1 && crsq_push_st1;
assign crsq_stall = crsq_valid && !crsq_ready;
assign crsq_pmask = pmask_st1;
@@ -445,20 +445,21 @@ module VX_bank #(
// Enqueue memory request
wire mreq_push, mreq_pop, mreq_empty;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mreq_data;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mreq_byteen;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mreq_wsel;
wire [NUM_PORTS-1:0] mreq_pmask;
wire [`LINE_ADDR_WIDTH-1:0] mreq_addr;
wire [MSHR_ADDR_WIDTH-1:0] mreq_id;
wire mreq_rw;
wire mreq_push, mreq_pop, mreq_empty, mreq_rw;
assign mreq_push = valid_st1 && mreq_push_st1;
assign mreq_push = (do_read_st1 && miss_st1 && !mshr_pending_st1)
|| do_write_st1;
assign mreq_pop = mem_req_valid && mem_req_ready;
assign mreq_rw = WRITE_ENABLE && write_st1;
assign mreq_rw = WRITE_ENABLE && is_write_st1;
assign mreq_addr = addr_st1;
assign mreq_id = mshr_id_st1;
assign mreq_pmask= pmask_st1;
@@ -484,7 +485,9 @@ module VX_bank #(
`UNUSED_PIN (size)
);
assign mem_req_valid = !mreq_empty;
assign mem_req_valid = !mreq_empty;
///////////////////////////////////////////////////////////////////////////////
`SCOPE_ASSIGN (valid_st0, valid_st0);
`SCOPE_ASSIGN (valid_st1, valid_st1);
@@ -498,15 +501,14 @@ module VX_bank #(
`SCOPE_ASSIGN (addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
`ifdef PERF_ENABLE
assign perf_read_misses = valid_st1 && read_st1 && !is_mshr_st1 && miss_st1;
assign perf_write_misses = valid_st1 && write_st1 && !is_mshr_st1 && miss_st1;
assign perf_read_misses = do_read_st1 && miss_st1;
assign perf_write_misses = do_write_st1 && miss_st1;
assign perf_pipe_stalls = crsq_stall || mreq_alm_full || mshr_alm_full;
assign perf_mshr_stalls = mshr_alm_full;
`endif
`ifdef DBG_PRINT_CACHE_BANK
wire crsq_fire = crsq_valid && crsq_ready;
wire pipeline_stall = (mshr_valid || mem_rsp_valid || creq_valid)
&& ~(mshr_fire || mem_rsp_fire || creq_fire);
@@ -533,7 +535,7 @@ module VX_bank #(
dpi_trace("%d: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, pmask=%b, tid=%0d, data=%0h, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_pmask, crsq_tid, crsq_data, debug_wid_st1, debug_pc_st1);
end
if (mreq_push) begin
if (write_st1)
if (is_write_st1)
dpi_trace("%d: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_data, mreq_byteen, debug_wid_st1, debug_pc_st1);
else
dpi_trace("%d: cache%0d:%0d fill-req: addr=%0h, id=%0d, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_id, debug_wid_st1, debug_pc_st1);