Merge branch 'master' into graphics

This commit is contained in:
Blaise Tine
2021-10-15 19:32:11 -07:00
542 changed files with 124552 additions and 18682 deletions

548
hw/rtl/cache/VX_bank.sv vendored Normal file
View File

@@ -0,0 +1,548 @@
`include "VX_cache_define.vh"
module VX_bank #(
parameter CACHE_ID = 0,
parameter BANK_ID = 0,
// Number of Word requests per cycle
parameter NUM_REQS = 1,
// Size of cache in bytes
parameter CACHE_SIZE = 1,
// Size of line inside a bank in bytes
parameter CACHE_LINE_SIZE = 1,
// Number of bankS
parameter NUM_BANKS = 1,
// Number of ports per banks
parameter NUM_PORTS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 1,
// Core Request Queue Size
parameter CREQ_SIZE = 1,
// Core Response Queue Size
parameter CRSQ_SIZE = 1,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 1,
// Memory Request Queue Size
parameter MREQ_SIZE = 1,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// core request tag size
parameter CORE_TAG_WIDTH = 1,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0,
// bank offset from beginning of index range
parameter BANK_ADDR_OFFSET = 0,
parameter MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE),
parameter WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
) (
`SCOPE_IO_VX_bank
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
output wire perf_read_misses,
output wire perf_write_misses,
output wire perf_mshr_stalls,
output wire perf_pipe_stalls,
`endif
// Core Request
input wire core_req_valid,
input wire [NUM_PORTS-1:0] core_req_pmask,
input wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] core_req_wsel,
input wire [NUM_PORTS-1:0][WORD_SIZE-1:0] core_req_byteen,
input wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] core_req_data,
input wire [NUM_PORTS-1:0][`REQS_BITS-1:0] core_req_tid,
input wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
input wire core_req_rw,
input wire [`LINE_ADDR_WIDTH-1:0] core_req_addr,
output wire core_req_ready,
// Core Response
output wire core_rsp_valid,
output wire [NUM_PORTS-1:0] core_rsp_pmask,
output wire [NUM_PORTS-1:0][`REQS_BITS-1:0] core_rsp_tid,
output wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
output wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
input wire core_rsp_ready,
// Memory request
output wire mem_req_valid,
output wire mem_req_rw,
output wire [NUM_PORTS-1:0] mem_req_pmask,
output wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen,
output wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mem_req_wsel,
output wire [`LINE_ADDR_WIDTH-1:0] mem_req_addr,
output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id,
output wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mem_req_data,
input wire mem_req_ready,
// Memory response
input wire mem_rsp_valid,
input wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id,
input wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data,
output wire mem_rsp_ready,
// flush
input wire flush_enable,
input wire [`LINE_SELECT_BITS-1:0] flush_addr
);
`UNUSED_PARAM (CORE_TAG_ID_BITS)
`ifdef DBG_CACHE_REQ_INFO
`IGNORE_UNUSED_BEGIN
wire [31:0] debug_pc_sel, debug_pc_st0, debug_pc_st1;
wire [`NW_BITS-1:0] debug_wid_sel, debug_wid_st0, debug_wid_st1;
`IGNORE_UNUSED_END
`endif
wire [NUM_PORTS-1:0] creq_pmask;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] creq_wsel;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] creq_byteen;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] creq_data;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] creq_tid;
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] creq_tag;
wire creq_rw;
wire [`LINE_ADDR_WIDTH-1:0] creq_addr;
wire creq_valid, creq_ready;
VX_elastic_buffer #(
.DATAW (1 + `LINE_ADDR_WIDTH + NUM_PORTS * (1 + WORD_SELECT_BITS + WORD_SIZE + `WORD_WIDTH + `REQS_BITS + CORE_TAG_WIDTH)),
.SIZE (CREQ_SIZE)
) core_req_queue (
.clk (clk),
.reset (reset),
.ready_in (core_req_ready),
.valid_in (core_req_valid),
.data_in ({core_req_rw, core_req_addr, core_req_pmask, core_req_wsel, core_req_byteen, core_req_data, core_req_tid, core_req_tag}),
.data_out ({creq_rw, creq_addr, creq_pmask, creq_wsel, creq_byteen, creq_data, creq_tid, creq_tag}),
.ready_out (creq_ready),
.valid_out (creq_valid)
);
wire mreq_alm_full;
wire [`LINE_ADDR_WIDTH-1:0] mem_rsp_addr;
wire crsq_valid, crsq_ready;
wire crsq_stall;
wire mshr_valid;
wire mshr_ready;
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id;
wire mshr_alm_full;
wire [MSHR_ADDR_WIDTH-1:0] mshr_dequeue_id;
wire [`LINE_ADDR_WIDTH-1:0] mshr_addr;
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] mshr_tag;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mshr_wsel;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] mshr_tid;
wire [NUM_PORTS-1:0] mshr_pmask;
wire [`LINE_ADDR_WIDTH-1:0] addr_st0, addr_st1;
wire is_read_st0, is_read_st1;
wire is_write_st0, is_write_st1;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] wsel_st0, wsel_st1;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] byteen_st0, byteen_st1;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] req_tid_st0, req_tid_st1;
wire [NUM_PORTS-1:0] pmask_st0, pmask_st1;
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] tag_st0, tag_st1;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] rdata_st1;
wire [`CACHE_LINE_WIDTH-1:0] wdata_st0, wdata_st1;
wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1;
wire valid_st0, valid_st1;
wire is_fill_st0, is_fill_st1;
wire is_mshr_st0, is_mshr_st1;
wire miss_st0, miss_st1;
wire is_flush_st0;
wire mshr_pending_st0, mshr_pending_st1;
// prevent read-during-write hazard when accessing tags/data block RAMs
wire rdw_fill_hazard = valid_st0 && is_fill_st0;
wire rdw_write_hazard = valid_st0 && is_write_st0 && ~creq_rw;
// determine which queue to pop next in priority order
wire mshr_grant = !flush_enable;
wire mshr_enable = mshr_grant && mshr_valid;
wire mrsq_grant = !flush_enable && !mshr_enable;
wire mrsq_enable = mrsq_grant && mem_rsp_valid;
wire creq_grant = !flush_enable && !mshr_enable && !mrsq_enable;
wire creq_enable = creq_grant && creq_valid;
assign mshr_ready = mshr_grant
&& !rdw_fill_hazard // prevent read-during-write hazard
&& !crsq_stall; // ensure core_rsp_queue not full
assign mem_rsp_ready = mrsq_grant
&& !crsq_stall; // ensure core_rsp_queue not full
assign creq_ready = creq_grant
&& !rdw_write_hazard // prevent read-during-write hazard
&& !mreq_alm_full // ensure mem_req_queue not full
&& !mshr_alm_full // ensure mshr not full
&& !crsq_stall; // ensure core_rsp_queue not full
wire flush_fire = flush_enable;
wire mshr_fire = mshr_valid && mshr_ready;
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
wire creq_fire = creq_valid && creq_ready;
`ifdef DBG_CACHE_REQ_INFO
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_wid_sel, debug_pc_sel} = mshr_enable ? mshr_tag[0][`CACHE_REQ_INFO_RNG] : creq_tag[0][`CACHE_REQ_INFO_RNG];
end else begin
assign {debug_wid_sel, debug_pc_sel} = 0;
end
`endif
wire [`CACHE_LINE_WIDTH-1:0] wdata_sel;
assign wdata_sel[(NUM_PORTS * `WORD_WIDTH)-1:0] = (mem_rsp_valid || !WRITE_ENABLE) ? mem_rsp_data[(NUM_PORTS * `WORD_WIDTH)-1:0] : creq_data;
for (genvar i = NUM_PORTS * `WORD_WIDTH; i < `CACHE_LINE_WIDTH; ++i) begin
assign wdata_sel[i] = mem_rsp_data[i];
end
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + NUM_PORTS * (WORD_SELECT_BITS + WORD_SIZE + `REQS_BITS + 1 + CORE_TAG_WIDTH) + MSHR_ADDR_WIDTH),
.RESETW (1)
) pipe_reg0 (
.clk (clk),
.reset (reset),
.enable (!crsq_stall),
.data_in ({
flush_fire || mshr_fire || mem_rsp_fire || creq_fire,
flush_enable,
mshr_enable,
mrsq_enable,
creq_enable && ~creq_rw,
creq_enable && creq_rw,
flush_enable ? `LINE_ADDR_WIDTH'(flush_addr) : (mshr_valid ? mshr_addr : (mem_rsp_valid ? mem_rsp_addr : creq_addr)),
wdata_sel,
mshr_valid ? mshr_wsel : creq_wsel,
creq_byteen,
mshr_valid ? mshr_tid : creq_tid,
mshr_valid ? mshr_pmask : creq_pmask,
mshr_valid ? mshr_tag : creq_tag,
mshr_valid ? mshr_dequeue_id : mem_rsp_id
}),
.data_out ({valid_st0, is_flush_st0, is_mshr_st0, is_fill_st0, is_read_st0, is_write_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_id_st0})
);
`ifdef DBG_CACHE_REQ_INFO
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_wid_st0, debug_pc_st0} = tag_st0[0][`CACHE_REQ_INFO_RNG];
end else begin
assign {debug_wid_st0, debug_pc_st0} = 0;
end
`endif
wire do_fill_st0 = valid_st0 && is_fill_st0;
wire do_flush_st0 = valid_st0 && is_flush_st0;
wire do_lookup_st0 = valid_st0 && ~(is_fill_st0 || is_flush_st0);
wire tag_match_st0;
VX_tag_access #(
.BANK_ID (BANK_ID),
.CACHE_ID (CACHE_ID),
.CACHE_SIZE (CACHE_SIZE),
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.BANK_ADDR_OFFSET (BANK_ADDR_OFFSET)
) tag_access (
.clk (clk),
.reset (reset),
`ifdef DBG_CACHE_REQ_INFO
.debug_pc (debug_pc_st0),
.debug_wid (debug_wid_st0),
`endif
.stall (crsq_stall),
// read/Fill
.lookup (do_lookup_st0),
.addr (addr_st0),
.fill (do_fill_st0),
.flush (do_flush_st0),
.tag_match (tag_match_st0)
);
// we have a core request hit
assign miss_st0 = (is_read_st0 || is_write_st0) && ~tag_match_st0;
wire [MSHR_ADDR_WIDTH-1:0] mshr_id_a_st0 = (is_read_st0 || is_write_st0) ? mshr_alloc_id : mshr_id_st0;
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + NUM_PORTS * (WORD_SELECT_BITS + WORD_SIZE + `REQS_BITS + 1 + CORE_TAG_WIDTH) + MSHR_ADDR_WIDTH + 1),
.RESETW (1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (!crsq_stall),
.data_in ({valid_st0, is_mshr_st0, is_fill_st0, is_read_st0, is_write_st0, miss_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_id_a_st0, mshr_pending_st0}),
.data_out ({valid_st1, is_mshr_st1, is_fill_st1, is_read_st1, is_write_st1, miss_st1, addr_st1, wdata_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1, mshr_id_st1, mshr_pending_st1})
);
`ifdef DBG_CACHE_REQ_INFO
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_wid_st1, debug_pc_st1} = tag_st1[0][`CACHE_REQ_INFO_RNG];
end else begin
assign {debug_wid_st1, debug_pc_st1} = 0;
end
`endif
wire do_read_st0 = valid_st0 && is_read_st0;
wire do_read_st1 = valid_st1 && is_read_st1;
wire do_fill_st1 = valid_st1 && is_fill_st1;
wire do_write_st1 = valid_st1 && is_write_st1;
wire do_mshr_st1 = valid_st1 && is_mshr_st1;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] creq_data_st1 = wdata_st1[0 +: NUM_PORTS * `WORD_WIDTH];
`UNUSED_VAR (wdata_st1)
VX_data_access #(
.BANK_ID (BANK_ID),
.CACHE_ID (CACHE_ID),
.CACHE_SIZE (CACHE_SIZE),
.CACHE_LINE_SIZE(CACHE_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_PORTS (NUM_PORTS),
.WORD_SIZE (WORD_SIZE),
.WRITE_ENABLE (WRITE_ENABLE)
) data_access (
.clk (clk),
.reset (reset),
`ifdef DBG_CACHE_REQ_INFO
.debug_pc (debug_pc_st1),
.debug_wid (debug_wid_st1),
`endif
.stall (crsq_stall),
.read (do_read_st1 || do_mshr_st1),
.fill (do_fill_st1),
.write (do_write_st1 && !miss_st1),
.addr (addr_st1),
.wsel (wsel_st1),
.pmask (pmask_st1),
.byteen (byteen_st1),
.fill_data (wdata_st1),
.write_data (creq_data_st1),
.read_data (rdata_st1)
);
wire mshr_allocate = do_read_st0 && !crsq_stall;
wire mshr_replay = do_fill_st0 && !crsq_stall;
wire mshr_lookup = mshr_allocate;
wire mshr_release = do_read_st1 && !miss_st1 && !crsq_stall;
VX_pending_size #(
.SIZE (MSHR_SIZE)
) mshr_pending_size (
.clk (clk),
.reset (reset),
.incr (creq_fire && ~creq_rw),
.decr (mshr_fire || mshr_release),
.full (mshr_alm_full),
`UNUSED_PIN (size),
`UNUSED_PIN (empty)
);
VX_miss_resrv #(
.BANK_ID (BANK_ID),
.CACHE_ID (CACHE_ID),
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_PORTS (NUM_PORTS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.MSHR_SIZE (MSHR_SIZE),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH)
) miss_resrv (
.clk (clk),
.reset (reset),
`ifdef DBG_CACHE_REQ_INFO
.deq_debug_pc (debug_pc_sel),
.deq_debug_wid (debug_wid_sel),
.lkp_debug_pc (debug_pc_st0),
.lkp_debug_wid (debug_wid_st0),
.rel_debug_pc (debug_pc_st1),
.rel_debug_wid (debug_wid_st1),
`endif
// allocate
.allocate_valid (mshr_allocate),
.allocate_addr (addr_st0),
.allocate_data ({wsel_st0, tag_st0, req_tid_st0, pmask_st0}),
.allocate_id (mshr_alloc_id),
`UNUSED_PIN (allocate_ready),
// lookup
.lookup_valid (mshr_lookup),
.lookup_replay (mshr_replay),
.lookup_id (mshr_alloc_id),
.lookup_addr (addr_st0),
.lookup_match (mshr_pending_st0),
// fill
.fill_valid (mem_rsp_fire),
.fill_id (mem_rsp_id),
.fill_addr (mem_rsp_addr),
// dequeue
.dequeue_valid (mshr_valid),
.dequeue_id (mshr_dequeue_id),
.dequeue_addr (mshr_addr),
.dequeue_data ({mshr_wsel, mshr_tag, mshr_tid, mshr_pmask}),
.dequeue_ready (mshr_ready),
// release
.release_valid (mshr_release),
.release_id (mshr_id_st1)
);
// Enqueue core response
wire [NUM_PORTS-1:0] crsq_pmask;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] crsq_data;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] crsq_tid;
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] crsq_tag;
assign crsq_valid = (do_read_st1 && !miss_st1)
|| do_mshr_st1;
assign crsq_stall = crsq_valid && !crsq_ready;
assign crsq_pmask = pmask_st1;
assign crsq_tid = req_tid_st1;
assign crsq_data = rdata_st1;
assign crsq_tag = tag_st1;
VX_elastic_buffer #(
.DATAW (NUM_PORTS * (CORE_TAG_WIDTH + 1 + `WORD_WIDTH + `REQS_BITS)),
.SIZE (CRSQ_SIZE),
.OUT_REG (1)
) core_rsp_req (
.clk (clk),
.reset (reset),
.valid_in (crsq_valid),
.data_in ({crsq_tag, crsq_pmask, crsq_data, crsq_tid}),
.ready_in (crsq_ready),
.valid_out (core_rsp_valid),
.data_out ({core_rsp_tag, core_rsp_pmask, core_rsp_data, core_rsp_tid}),
.ready_out (core_rsp_ready)
);
// Enqueue memory request
wire mreq_push, mreq_pop, mreq_empty;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mreq_data;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mreq_byteen;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mreq_wsel;
wire [NUM_PORTS-1:0] mreq_pmask;
wire [`LINE_ADDR_WIDTH-1:0] mreq_addr;
wire [MSHR_ADDR_WIDTH-1:0] mreq_id;
wire mreq_rw;
assign mreq_push = (do_read_st1 && miss_st1 && !mshr_pending_st1)
|| do_write_st1;
assign mreq_pop = mem_req_valid && mem_req_ready;
assign mreq_rw = WRITE_ENABLE && is_write_st1;
assign mreq_addr = addr_st1;
assign mreq_id = mshr_id_st1;
assign mreq_pmask= pmask_st1;
assign mreq_wsel = wsel_st1;
assign mreq_byteen = byteen_st1;
assign mreq_data = creq_data_st1;
VX_fifo_queue #(
.DATAW (1 + `LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH)),
.SIZE (MREQ_SIZE),
.ALM_FULL (MREQ_SIZE-2),
.OUT_REG (1 == NUM_BANKS)
) mem_req_queue (
.clk (clk),
.reset (reset),
.push (mreq_push),
.pop (mreq_pop),
.data_in ({mreq_rw, mreq_addr, mreq_id, mreq_pmask, mreq_byteen, mreq_wsel, mreq_data}),
.data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_pmask, mem_req_byteen, mem_req_wsel, mem_req_data}),
.empty (mreq_empty),
.alm_full (mreq_alm_full),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (size)
);
assign mem_req_valid = !mreq_empty;
///////////////////////////////////////////////////////////////////////////////
`SCOPE_ASSIGN (valid_st0, valid_st0);
`SCOPE_ASSIGN (valid_st1, valid_st1);
`SCOPE_ASSIGN (is_fill_st0, is_fill_st0);
`SCOPE_ASSIGN (is_mshr_st0, is_mshr_st0);
`SCOPE_ASSIGN (miss_st0, miss_st0);
`SCOPE_ASSIGN (crsq_stall, crsq_stall);
`SCOPE_ASSIGN (mreq_alm_full, mreq_alm_full);
`SCOPE_ASSIGN (mshr_alm_full, mshr_alm_full);
`SCOPE_ASSIGN (addr_st0, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID));
`SCOPE_ASSIGN (addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
`ifdef PERF_ENABLE
assign perf_read_misses = do_read_st1 && miss_st1;
assign perf_write_misses = do_write_st1 && miss_st1;
assign perf_pipe_stalls = crsq_stall || mreq_alm_full || mshr_alm_full;
assign perf_mshr_stalls = mshr_alm_full;
`endif
`ifdef DBG_PRINT_CACHE_BANK
wire crsq_fire = crsq_valid && crsq_ready;
wire pipeline_stall = (mshr_valid || mem_rsp_valid || creq_valid)
&& ~(mshr_fire || mem_rsp_fire || creq_fire);
always @(posedge clk) begin
if (pipeline_stall) begin
dpi_trace("%d: *** cache%0d:%0d stall: crsq=%b, mreq=%b, mshr=%b\n", $time, CACHE_ID, BANK_ID, crsq_stall, mreq_alm_full, mshr_alm_full);
end
if (flush_enable) begin
dpi_trace("%d: cache%0d:%0d flush: addr=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(flush_addr, BANK_ID));
end
if (mem_rsp_fire) begin
dpi_trace("%d: cache%0d:%0d fill-rsp: addr=%0h, id=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data);
end
if (mshr_fire) begin
dpi_trace("%d: cache%0d:%0d mshr-pop: addr=%0h, tag=%0h, pmask=%b, tid=%0d, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mshr_addr, BANK_ID), mshr_tag, mshr_pmask, mshr_tid, debug_wid_sel, debug_pc_sel);
end
if (creq_fire) begin
if (creq_rw)
dpi_trace("%d: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, data=%0h, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, creq_data, debug_wid_sel, debug_pc_sel);
else
dpi_trace("%d: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, debug_wid_sel, debug_pc_sel);
end
if (crsq_fire) begin
dpi_trace("%d: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, pmask=%b, tid=%0d, data=%0h, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_pmask, crsq_tid, crsq_data, debug_wid_st1, debug_pc_st1);
end
if (mreq_push) begin
if (is_write_st1)
dpi_trace("%d: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_data, mreq_byteen, debug_wid_st1, debug_pc_st1);
else
dpi_trace("%d: cache%0d:%0d fill-req: addr=%0h, id=%0d, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_id, debug_wid_st1, debug_pc_st1);
end
end
`endif
endmodule

585
hw/rtl/cache/VX_bank.v vendored
View File

@@ -1,585 +0,0 @@
`include "VX_cache_define.vh"
module VX_bank #(
parameter CACHE_ID = 0,
parameter BANK_ID = 0,
// Number of Word requests per cycle
parameter NUM_REQS = 1,
// Size of cache in bytes
parameter CACHE_SIZE = 1,
// Size of line inside a bank in bytes
parameter CACHE_LINE_SIZE = 1,
// Number of bankS
parameter NUM_BANKS = 1,
// Number of ports per banks
parameter NUM_PORTS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 1,
// Core Request Queue Size
parameter CREQ_SIZE = 1,
// Core Response Queue Size
parameter CRSQ_SIZE = 1,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 1,
// Memory Request Queue Size
parameter MREQ_SIZE = 1,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// core request tag size
parameter CORE_TAG_WIDTH = 1,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0,
// bank offset from beginning of index range
parameter BANK_ADDR_OFFSET = 0
) (
`SCOPE_IO_VX_bank
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
output wire perf_read_misses,
output wire perf_write_misses,
output wire perf_mshr_stalls,
output wire perf_pipe_stalls,
`endif
// Core Request
input wire core_req_valid,
input wire [NUM_PORTS-1:0] core_req_pmask,
input wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] core_req_wsel,
input wire [NUM_PORTS-1:0][WORD_SIZE-1:0] core_req_byteen,
input wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] core_req_data,
input wire [NUM_PORTS-1:0][`REQS_BITS-1:0] core_req_tid,
input wire core_req_rw,
input wire [`LINE_ADDR_WIDTH-1:0] core_req_addr,
input wire [CORE_TAG_WIDTH-1:0] core_req_tag,
output wire core_req_ready,
// Core Response
output wire core_rsp_valid,
output wire [NUM_PORTS-1:0] core_rsp_pmask,
output wire [NUM_PORTS-1:0][`REQS_BITS-1:0] core_rsp_tid,
output wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
output wire [CORE_TAG_WIDTH-1:0] core_rsp_tag,
input wire core_rsp_ready,
// Memory request
output wire mem_req_valid,
output wire mem_req_rw,
output wire [CACHE_LINE_SIZE-1:0] mem_req_byteen,
output wire [`LINE_ADDR_WIDTH-1:0] mem_req_addr,
output wire [`CACHE_LINE_WIDTH-1:0] mem_req_data,
input wire mem_req_ready,
// Memory response
input wire mem_rsp_valid,
input wire [`LINE_ADDR_WIDTH-1:0] mem_rsp_addr,
input wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data,
output wire mem_rsp_ready,
// flush
input wire flush_enable,
input wire [`LINE_SELECT_BITS-1:0] flush_addr
);
`UNUSED_PARAM (CORE_TAG_ID_BITS)
`ifdef DBG_CACHE_REQ_INFO
`IGNORE_WARNINGS_BEGIN
wire [31:0] debug_pc_sel, debug_pc_st0, debug_pc_st1;
wire [`NW_BITS-1:0] debug_wid_sel, debug_wid_st0, debug_wid_st1;
`IGNORE_WARNINGS_END
`endif
wire [NUM_PORTS-1:0] creq_pmask;
wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] creq_wsel;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] creq_byteen;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] creq_data;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] creq_tid;
wire creq_rw;
wire [`LINE_ADDR_WIDTH-1:0] creq_addr;
wire [CORE_TAG_WIDTH-1:0] creq_tag;
wire creq_out_valid, creq_out_ready;
VX_elastic_buffer #(
.DATAW (CORE_TAG_WIDTH + 1 + `LINE_ADDR_WIDTH + (1 + `UP(`WORD_SELECT_BITS) + WORD_SIZE + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS),
.SIZE (CREQ_SIZE),
.OUTPUT_REG (CREQ_SIZE > 2)
) core_req_queue (
.clk (clk),
.reset (reset),
.ready_in (core_req_ready),
.valid_in (core_req_valid),
.data_in ({core_req_tag, core_req_rw, core_req_addr, core_req_pmask, core_req_wsel, core_req_byteen, core_req_data, core_req_tid}),
.data_out ({creq_tag, creq_rw, creq_addr, creq_pmask, creq_wsel, creq_byteen, creq_data, creq_tid}),
.ready_out (creq_out_ready),
.valid_out (creq_out_valid)
);
wire mshr_alm_full;
wire mshr_pop;
wire mshr_valid;
wire [`LINE_ADDR_WIDTH-1:0] mshr_addr;
wire [CORE_TAG_WIDTH-1:0] mshr_tag;
wire [NUM_PORTS-1:0] mshr_pmask;
wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] mshr_wsel;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] mshr_tid;
wire [`LINE_ADDR_WIDTH-1:0] addr_st0, addr_st1;
wire mem_rw_st0, mem_rw_st1;
wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] wsel_st0, wsel_st1;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] byteen_st0, byteen_st1;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] req_tid_st0, req_tid_st1;
wire [NUM_PORTS-1:0] pmask_st0, pmask_st1;
wire [`CACHE_LINE_WIDTH-1:0] rdata_st1;
wire [`CACHE_LINE_WIDTH-1:0] wdata_st0, wdata_st1;
wire [CORE_TAG_WIDTH-1:0] tag_st0, tag_st1;
wire valid_st0, valid_st1;
wire is_fill_st0, is_fill_st1;
wire is_mshr_st0, is_mshr_st1;
wire miss_st0, miss_st1;
wire prev_miss_dep_st0;
wire force_miss_st0, force_miss_st1;
wire not_same_prev_mshr_st0, not_same_prev_mshr_st1;
wire writeen_unqual_st0, writeen_unqual_st1;
wire incoming_fill_unqual_st0, incoming_fill_unqual_st1;
wire mshr_pending_st0;
wire is_flush_st0;
wire crsq_in_valid, crsq_in_ready, crsq_in_stall;
wire mreq_alm_full;
wire creq_out_fire = creq_out_valid && creq_out_ready;
wire crsq_in_fire = crsq_in_valid && crsq_in_ready;
VX_pending_size #(
.SIZE (MSHR_SIZE)
) mshr_pending_size (
.clk (clk),
.reset (reset),
.push (creq_out_fire && !creq_rw),
.pop (crsq_in_fire),
.full (mshr_alm_full),
`UNUSED_PIN (empty),
`UNUSED_PIN (size)
);
// determine which queue to pop next in priority order
wire mshr_grant = !mreq_alm_full; // ensure memory request queue not full (deadlock prevention)
wire mshr_enable = mshr_grant && mshr_valid;
wire mrsq_grant = !mshr_enable;
wire mrsq_enable = mrsq_grant && mem_rsp_valid;
wire creq_grant = !mshr_enable && !mrsq_enable && !flush_enable;
wire is_miss_st1 = (miss_st1 || force_miss_st1);
assign mshr_pop = mshr_enable
&& !(valid_st1 && is_mshr_st1 && is_miss_st1) // do not schedule another mshr request if the previous one missed
&& !crsq_in_stall; // ensure core response ready
assign creq_out_ready = creq_grant
&& !mreq_alm_full // ensure memory request ready
&& !mshr_alm_full // ensure mshr enqueue ready
&& !crsq_in_stall; // ensure core response ready
assign mem_rsp_ready = mrsq_grant
&& !crsq_in_stall; // ensure core response ready
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
`ifdef DBG_CACHE_REQ_INFO
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_wid_sel, debug_pc_sel} = mshr_enable ? mshr_tag[`CACHE_REQ_INFO_RNG] : creq_tag[`CACHE_REQ_INFO_RNG];
end else begin
assign {debug_wid_sel, debug_pc_sel} = 0;
end
`endif
wire [`CACHE_LINE_WIDTH-1:0] creq_line_data;
if (`WORDS_PER_LINE > 1) begin
if (NUM_PORTS > 1) begin
reg [`CACHE_LINE_WIDTH-1:0] creq_line_data_r;
always @(*) begin
creq_line_data_r = 'x;
for (integer p = 0; p < NUM_PORTS; p++) begin
if (creq_pmask[p]) begin
creq_line_data_r[creq_wsel[p] * `WORD_WIDTH +: `WORD_WIDTH] = creq_data[p];
end
end
end
assign creq_line_data = creq_line_data_r;
end else begin
assign creq_line_data = {`WORDS_PER_LINE{creq_data}};
end
end else begin
assign creq_line_data = creq_data;
end
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH + 1),
.RESETW (1)
) pipe_reg0 (
.clk (clk),
.reset (reset),
.enable (!crsq_in_stall),
.data_in ({
flush_enable || mshr_pop || mem_rsp_fire || creq_out_fire,
flush_enable,
mshr_enable,
mrsq_enable || flush_enable,
mshr_enable ? 1'b0 : creq_rw,
mshr_enable ? mshr_addr : (mem_rsp_valid ? mem_rsp_addr : (flush_enable ? `LINE_ADDR_WIDTH'(flush_addr) : creq_addr)),
(mem_rsp_valid || !WRITE_ENABLE) ? mem_rsp_data : creq_line_data,
mshr_enable ? mshr_wsel : creq_wsel,
creq_byteen,
mshr_enable ? mshr_tid : creq_tid,
mshr_enable ? mshr_pmask : creq_pmask,
mshr_enable ? mshr_tag : creq_tag
}),
.data_out ({valid_st0, is_flush_st0, is_mshr_st0, is_fill_st0, mem_rw_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0})
);
`ifdef DBG_CACHE_REQ_INFO
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_wid_st0, debug_pc_st0} = tag_st0[`CACHE_REQ_INFO_RNG];
end else begin
assign {debug_wid_st0, debug_pc_st0} = 0;
end
`endif
wire do_lookup_st0 = valid_st0 && ~is_fill_st0;
wire do_fill_st0 = valid_st0 && is_fill_st0 && !crsq_in_stall;
wire tag_match_st0;
VX_tag_access #(
.BANK_ID (BANK_ID),
.CACHE_ID (CACHE_ID),
.CACHE_SIZE (CACHE_SIZE),
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.BANK_ADDR_OFFSET (BANK_ADDR_OFFSET)
) tag_access (
.clk (clk),
.reset (reset),
`ifdef DBG_CACHE_REQ_INFO
.debug_pc (debug_pc_st0),
.debug_wid (debug_wid_st0),
`endif
// read/Fill
.lookup (do_lookup_st0),
.addr (addr_st0),
.fill (do_fill_st0),
.is_flush (is_flush_st0),
.tag_match (tag_match_st0)
);
// we had a miss with prior request for the current address
assign prev_miss_dep_st0 = valid_st1 && is_miss_st1 && (addr_st0 == addr_st1);
// we have a core request hit
assign miss_st0 = !is_fill_st0 && !tag_match_st0;
// force a miss to ensure commit order when a new request has pending previous requests to same block
// also force a miss for mshr requests when previous request was a missed
assign force_miss_st0 = (!is_fill_st0 && !is_mshr_st0 && (mshr_pending_st0 || prev_miss_dep_st0))
|| (is_mshr_st0 && valid_st1 && is_mshr_st1 && is_miss_st1);
// previous mshr request doesn't have same address
assign not_same_prev_mshr_st0 = valid_st1 && is_mshr_st1 && (addr_st1 != addr_st0);
// enable write when we have a fill request that is not redundant
assign writeen_unqual_st0 = is_fill_st0 && !tag_match_st0;
// check if incoming memory response match current address
assign incoming_fill_unqual_st0 = mem_rsp_valid && (addr_st0 == mem_rsp_addr);
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH),
.RESETW (1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (!crsq_in_stall),
.data_in ({valid_st0, is_mshr_st0, is_fill_st0, writeen_unqual_st0, incoming_fill_unqual_st0, miss_st0, force_miss_st0, mem_rw_st0, not_same_prev_mshr_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0}),
.data_out ({valid_st1, is_mshr_st1, is_fill_st1, writeen_unqual_st1, incoming_fill_unqual_st1, miss_st1, force_miss_st1, mem_rw_st1, not_same_prev_mshr_st1, addr_st1, wdata_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1})
);
`ifdef DBG_CACHE_REQ_INFO
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_wid_st1, debug_pc_st1} = tag_st1[`CACHE_REQ_INFO_RNG];
end else begin
assign {debug_wid_st1, debug_pc_st1} = 0;
end
`endif
wire writeen_st1 = (WRITE_ENABLE && !is_fill_st1 && mem_rw_st1 && ~is_miss_st1)
|| writeen_unqual_st1;
wire readen_st1 = !is_fill_st1 && !mem_rw_st1;
wire crsq_push_st1 = readen_st1 && ~is_miss_st1;
wire mshr_push_st1 = readen_st1 && is_miss_st1;
wire incoming_fill_st1 = (mem_rsp_valid && (addr_st1 == mem_rsp_addr))
|| incoming_fill_unqual_st1;
wire do_writeback_st1 = !is_fill_st1 && mem_rw_st1;
wire mreq_push_st1 = (readen_st1 && miss_st1 && (~force_miss_st1 || not_same_prev_mshr_st1) && !incoming_fill_st1)
|| do_writeback_st1;
wire [`WORDS_PER_LINE-1:0][WORD_SIZE-1:0] line_byteen_st1;
if (`WORDS_PER_LINE > 1) begin
reg [CACHE_LINE_SIZE-1:0] line_byteen_r;
always @(*) begin
line_byteen_r = 0;
for (integer p = 0; p < NUM_PORTS; p++) begin
if ((NUM_PORTS == 1) || pmask_st1[p]) begin
line_byteen_r[wsel_st1[p] * WORD_SIZE +: WORD_SIZE] = byteen_st1[p];
end
end
end
assign line_byteen_st1 = line_byteen_r;
end else begin
assign line_byteen_st1 = byteen_st1;
end
VX_data_access #(
.BANK_ID (BANK_ID),
.CACHE_ID (CACHE_ID),
.CACHE_SIZE (CACHE_SIZE),
.CACHE_LINE_SIZE(CACHE_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.WRITE_ENABLE (WRITE_ENABLE)
) data_access (
.clk (clk),
.reset (reset),
`ifdef DBG_CACHE_REQ_INFO
.debug_pc (debug_pc_st1),
.debug_wid (debug_wid_st1),
`endif
.addr (addr_st1),
// reading
.readen (valid_st1 && readen_st1),
.rdata (rdata_st1),
// writing
.writeen (valid_st1 && writeen_st1),
.is_fill (is_fill_st1),
.byteen (line_byteen_st1),
.wdata (wdata_st1)
);
wire mshr_push = valid_st1 && mshr_push_st1;
wire mshr_dequeue = valid_st1 && is_mshr_st1 && !mshr_push_st1 && crsq_in_ready;
wire mshr_restore = is_mshr_st1;
// push a missed request as 'ready' if it was a forced miss that actually had a hit
// or the fill request for this block is comming
wire mshr_init_ready_state = !miss_st1 || incoming_fill_unqual_st1;
VX_miss_resrv #(
.BANK_ID (BANK_ID),
.CACHE_ID (CACHE_ID),
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_PORTS (NUM_PORTS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.MSHR_SIZE (MSHR_SIZE),
.ALM_FULL (MSHR_SIZE-2),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH)
) miss_resrv (
.clk (clk),
.reset (reset),
`ifdef DBG_CACHE_REQ_INFO
.deq_debug_pc (debug_pc_sel),
.deq_debug_wid (debug_wid_sel),
.enq_debug_pc (debug_pc_st1),
.enq_debug_wid (debug_wid_st1),
`endif
// enqueue
.enqueue (mshr_push),
.enqueue_addr (addr_st1),
.enqueue_data ({wsel_st1, tag_st1, req_tid_st1, pmask_st1}),
.enqueue_is_mshr (mshr_restore),
.enqueue_as_ready (mshr_init_ready_state),
`UNUSED_PIN (enqueue_almfull),
`UNUSED_PIN (enqueue_full),
// fill
.fill_start (mem_rsp_fire),
.fill_addr (mem_rsp_addr),
// lookup
.lookup_addr (addr_st0),
.lookup_match (mshr_pending_st0),
.lookup_fill (do_fill_st0),
// schedule
.schedule (mshr_pop),
.schedule_valid (mshr_valid),
.schedule_addr (mshr_addr),
.schedule_data ({mshr_wsel, mshr_tag, mshr_tid, mshr_pmask}),
// dequeue
.dequeue (mshr_dequeue)
);
// Enqueue core response
wire [NUM_PORTS-1:0] crsq_pmask;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] crsq_data;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] crsq_tid;
wire [CORE_TAG_WIDTH-1:0] crsq_tag;
assign crsq_in_valid = valid_st1 && crsq_push_st1;
assign crsq_in_stall = crsq_in_valid && !crsq_in_ready;
assign crsq_pmask = pmask_st1;
assign crsq_tid = req_tid_st1;
assign crsq_tag = tag_st1;
if (`WORDS_PER_LINE > 1) begin
for (genvar p = 0; p < NUM_PORTS; ++p) begin
assign crsq_data[p] = rdata_st1[wsel_st1[p] * `WORD_WIDTH +: `WORD_WIDTH];
end
end else begin
assign crsq_data = rdata_st1;
end
VX_elastic_buffer #(
.DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS),
.SIZE (CRSQ_SIZE),
.OUTPUT_REG (1 == NUM_BANKS)
) core_rsp_req (
.clk (clk),
.reset (reset),
.valid_in (crsq_in_valid),
.data_in ({crsq_tag, crsq_pmask, crsq_data, crsq_tid}),
.ready_in (crsq_in_ready),
.valid_out (core_rsp_valid),
.data_out ({core_rsp_tag, core_rsp_pmask, core_rsp_data, core_rsp_tid}),
.ready_out (core_rsp_ready)
);
// Enqueue memory request
wire [CACHE_LINE_SIZE-1:0] mreq_byteen;
wire [`LINE_ADDR_WIDTH-1:0] mreq_addr;
wire [`CACHE_LINE_WIDTH-1:0] mreq_data;
wire mreq_push, mreq_pop, mreq_empty, mreq_rw;
assign mreq_push = valid_st1 && mreq_push_st1;
assign mreq_pop = mem_req_valid && mem_req_ready;
assign mreq_rw = WRITE_ENABLE && do_writeback_st1;
assign mreq_byteen = mreq_rw ? line_byteen_st1 : {CACHE_LINE_SIZE{1'b1}};
assign mreq_addr = addr_st1;
assign mreq_data = wdata_st1;
VX_fifo_queue #(
.DATAW (1 + CACHE_LINE_SIZE + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH),
.SIZE (MREQ_SIZE),
.ALM_FULL (MREQ_SIZE-2)
) mem_req_queue (
.clk (clk),
.reset (reset),
.push (mreq_push),
.pop (mreq_pop),
.data_in ({mreq_rw, mreq_byteen, mreq_addr, mreq_data}),
.data_out ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data}),
.empty (mreq_empty),
.alm_full (mreq_alm_full),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (size)
);
assign mem_req_valid = !mreq_empty;
`SCOPE_ASSIGN (valid_st0, valid_st0);
`SCOPE_ASSIGN (valid_st1, valid_st1);
`SCOPE_ASSIGN (is_fill_st0, is_fill_st0);
`SCOPE_ASSIGN (is_mshr_st0, is_mshr_st0);
`SCOPE_ASSIGN (miss_st0, miss_st0);
`SCOPE_ASSIGN (force_miss_st0, force_miss_st0);
`SCOPE_ASSIGN (mshr_push, mshr_push);
`SCOPE_ASSIGN (crsq_in_stall, crsq_in_stall);
`SCOPE_ASSIGN (mreq_alm_full, mreq_alm_full);
`SCOPE_ASSIGN (mshr_alm_full, mshr_alm_full);
`SCOPE_ASSIGN (addr_st0, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID));
`SCOPE_ASSIGN (addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
`ifdef PERF_ENABLE
assign perf_read_misses = valid_st1 && !is_fill_st1 && !is_mshr_st1 && miss_st1 && !mem_rw_st1;
assign perf_write_misses = valid_st1 && !is_fill_st1 && !is_mshr_st1 && miss_st1 && mem_rw_st1;
assign perf_pipe_stalls = crsq_in_stall || mreq_alm_full || mshr_alm_full;
assign perf_mshr_stalls = mshr_alm_full;
`endif
`ifdef DBG_PRINT_CACHE_BANK
always @(posedge clk) begin
/*if (crsq_in_fire && (NUM_PORTS > 1) && $countones(crsq_pmask) > 1) begin
$display("%t: *** cache%0d:%0d multi-port-out: pmask=%b, addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, crsq_pmask, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag);
end*/
if (valid_st1 && !is_fill_st1 && miss_st1 && incoming_fill_st1) begin
$display("%t: *** cache%0d:%0d miss with incoming fill - addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
assert(!is_mshr_st1);
end
if (crsq_in_stall || mreq_alm_full || mshr_alm_full) begin
$display("%t: *** cache%0d:%0d pipeline-stall: cwbq=%b, dwbq=%b, mshr=%b", $time, CACHE_ID, BANK_ID, crsq_in_stall, mreq_alm_full, mshr_alm_full);
end
if (flush_enable) begin
$display("%t: cache%0d:%0d flush: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(flush_addr, BANK_ID));
end
if (mem_rsp_fire) begin
$display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_data);
end
if (mshr_pop) begin
$display("%t: cache%0d:%0d mshr-pop: addr=%0h, tag=%0h, pmask=%b, tid=%0d, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mshr_addr, BANK_ID), mshr_tag, mshr_pmask, mshr_tid, debug_wid_sel, debug_pc_sel);
end
if (creq_out_fire) begin
if (creq_rw)
$display("%t: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, creq_data, debug_wid_sel, debug_pc_sel);
else
$display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, debug_wid_sel, debug_pc_sel);
end
if (crsq_in_fire) begin
$display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, pmask=%b, tid=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_pmask, crsq_tid, crsq_data, debug_wid_st1, debug_pc_st1);
end
if (mreq_push) begin
if (do_writeback_st1)
$display("%t: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_data, mreq_byteen, debug_wid_st1, debug_pc_st1);
else
$display("%t: cache%0d:%0d fill-req: addr=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), debug_wid_st1, debug_pc_st1);
end
end
`endif
endmodule

View File

@@ -18,15 +18,15 @@ module VX_cache #(
parameter WORD_SIZE = 4,
// Core Request Queue Size
parameter CREQ_SIZE = 2,
parameter CREQ_SIZE = 0,
// Core Response Queue Size
parameter CRSQ_SIZE = 2,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 8,
// Memory Response Queue Size
parameter MRSQ_SIZE = 4,
parameter MRSQ_SIZE = 0,
// Memory Request Queue Size
parameter MREQ_SIZE = 2,
parameter MREQ_SIZE = 4,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
@@ -44,13 +44,15 @@ module VX_cache #(
parameter BANK_ADDR_OFFSET = 0,
// enable bypass for non-cacheable addresses
parameter NC_ENABLE = 0
parameter NC_ENABLE = 0,
parameter WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
) (
`SCOPE_IO_VX_cache
// PERF
`ifdef PERF_ENABLE
VX_perf_cache_if perf_cache_if,
VX_perf_cache_if.master perf_cache_if,
`endif
input wire clk,
@@ -91,6 +93,8 @@ module VX_cache #(
`STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value"))
`STATIC_ASSERT(NUM_PORTS <= NUM_BANKS, ("invalid value"))
localparam MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE);
localparam MEM_TAG_IN_WIDTH = `BANK_SELECT_BITS + MSHR_ADDR_WIDTH;
localparam CORE_TAG_X_WIDTH = CORE_TAG_WIDTH - NC_ENABLE;
localparam CORE_TAG_ID_X_BITS = (CORE_TAG_ID_BITS != 0) ? (CORE_TAG_ID_BITS - NC_ENABLE) : CORE_TAG_ID_BITS;
@@ -103,6 +107,117 @@ module VX_cache #(
///////////////////////////////////////////////////////////////////////////
wire mem_req_valid_sb;
wire mem_req_rw_sb;
wire [CACHE_LINE_SIZE-1:0] mem_req_byteen_sb;
wire [`MEM_ADDR_WIDTH-1:0] mem_req_addr_sb;
wire [`CACHE_LINE_WIDTH-1:0] mem_req_data_sb;
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_sb;
wire mem_req_ready_sb;
VX_skid_buffer #(
.DATAW (1+CACHE_LINE_SIZE+`MEM_ADDR_WIDTH+`CACHE_LINE_WIDTH+MEM_TAG_WIDTH),
.PASSTHRU (1 == NUM_BANKS)
) mem_req_sbuf (
.clk (clk),
.reset (reset),
.valid_in (mem_req_valid_sb),
.ready_in (mem_req_ready_sb),
.data_in ({mem_req_rw_sb, mem_req_byteen_sb, mem_req_addr_sb, mem_req_data_sb, mem_req_tag_sb}),
.data_out ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data, mem_req_tag}),
.valid_out (mem_req_valid),
.ready_out (mem_req_ready)
);
///////////////////////////////////////////////////////////////////////////
wire [`CORE_RSP_TAGS-1:0] core_rsp_valid_sb;
wire [NUM_REQS-1:0] core_rsp_tmask_sb;
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_sb;
wire [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_sb;
wire [`CORE_RSP_TAGS-1:0] core_rsp_ready_sb;
if (CORE_TAG_ID_BITS != 0) begin
VX_skid_buffer #(
.DATAW (NUM_REQS + NUM_REQS*`WORD_WIDTH + CORE_TAG_WIDTH),
.PASSTHRU (1 == NUM_BANKS)
) core_rsp_sbuf (
.clk (clk),
.reset (reset),
.valid_in (core_rsp_valid_sb),
.ready_in (core_rsp_ready_sb),
.data_in ({core_rsp_tmask_sb, core_rsp_data_sb, core_rsp_tag_sb}),
.data_out ({core_rsp_tmask, core_rsp_data, core_rsp_tag}),
.valid_out (core_rsp_valid),
.ready_out (core_rsp_ready)
);
end else begin
for (genvar i = 0; i < NUM_REQS; i++) begin
VX_skid_buffer #(
.DATAW (1 + `WORD_WIDTH + CORE_TAG_WIDTH),
.PASSTHRU (1 == NUM_BANKS)
) core_rsp_sbuf (
.clk (clk),
.reset (reset),
.valid_in (core_rsp_valid_sb[i]),
.ready_in (core_rsp_ready_sb[i]),
.data_in ({core_rsp_tmask_sb[i], core_rsp_data_sb[i], core_rsp_tag_sb[i]}),
.data_out ({core_rsp_tmask[i], core_rsp_data[i], core_rsp_tag[i]}),
.valid_out (core_rsp_valid[i]),
.ready_out (core_rsp_ready[i])
);
end
end
///////////////////////////////////////////////////////////////////////////
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen_p;
wire [NUM_PORTS-1:0] mem_req_pmask_p;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mem_req_wsel_p;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mem_req_data_p;
wire mem_req_rw_p;
if (WRITE_ENABLE) begin
if (`WORDS_PER_LINE > 1) begin
reg [CACHE_LINE_SIZE-1:0] mem_req_byteen_r;
reg [`CACHE_LINE_WIDTH-1:0] mem_req_data_r;
always @(*) begin
mem_req_byteen_r = 0;
mem_req_data_r = 'x;
for (integer i = 0; i < NUM_PORTS; ++i) begin
if ((1 == NUM_PORTS) || mem_req_pmask_p[i]) begin
mem_req_byteen_r[mem_req_wsel_p[i] * WORD_SIZE +: WORD_SIZE] = mem_req_byteen_p[i];
mem_req_data_r[mem_req_wsel_p[i] * `WORD_WIDTH +: `WORD_WIDTH] = mem_req_data_p[i];
end
end
end
assign mem_req_rw_sb = mem_req_rw_p;
assign mem_req_byteen_sb = mem_req_byteen_r;
assign mem_req_data_sb = mem_req_data_r;
end else begin
`UNUSED_VAR (mem_req_pmask_p)
`UNUSED_VAR (mem_req_wsel_p)
assign mem_req_rw_sb = mem_req_rw_p;
assign mem_req_byteen_sb = mem_req_byteen_p;
assign mem_req_data_sb = mem_req_data_p;
end
end else begin
`UNUSED_VAR (mem_req_byteen_p)
`UNUSED_VAR (mem_req_pmask_p)
`UNUSED_VAR (mem_req_wsel_p)
`UNUSED_VAR (mem_req_data_p)
`UNUSED_VAR (mem_req_rw_p)
assign mem_req_rw_sb = 0;
assign mem_req_byteen_sb = 'x;
assign mem_req_data_sb = 'x;
end
///////////////////////////////////////////////////////////////////////////
// Core request
wire [NUM_REQS-1:0] core_req_valid_nc;
wire [NUM_REQS-1:0] core_req_rw_nc;
@@ -122,20 +237,23 @@ module VX_cache #(
// Memory request
wire mem_req_valid_nc;
wire mem_req_rw_nc;
wire [CACHE_LINE_SIZE-1:0] mem_req_byteen_nc;
wire [`MEM_ADDR_WIDTH-1:0] mem_req_addr_nc;
wire [`CACHE_LINE_WIDTH-1:0] mem_req_data_nc;
wire [`MEM_ADDR_WIDTH-1:0] mem_req_tag_nc;
wire [NUM_PORTS-1:0] mem_req_pmask_nc;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen_nc;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mem_req_wsel_nc;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mem_req_data_nc;
wire [MEM_TAG_IN_WIDTH-1:0] mem_req_tag_nc;
wire mem_req_ready_nc;
// Memory response
wire mem_rsp_valid_nc;
wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_nc;
wire [`MEM_ADDR_WIDTH-1:0] mem_rsp_tag_nc;
wire mem_rsp_ready_nc;
wire [MEM_TAG_IN_WIDTH-1:0] mem_rsp_tag_nc;
wire mem_rsp_ready_nc;
if (NC_ENABLE) begin
VX_nc_bypass #(
.NUM_PORTS (NUM_PORTS),
.NUM_REQS (NUM_REQS),
.NUM_RSP_TAGS (`CORE_RSP_TAGS),
.NC_TAG_BIT (0),
@@ -145,12 +263,12 @@ module VX_cache #(
.CORE_TAG_IN_WIDTH (CORE_TAG_WIDTH),
.MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH),
.MEM_DATA_SIZE (CACHE_LINE_SIZE),
.MEM_TAG_IN_WIDTH (`MEM_ADDR_WIDTH),
.MEM_DATA_SIZE (CACHE_LINE_SIZE),
.MEM_TAG_IN_WIDTH (MEM_TAG_IN_WIDTH),
.MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH)
) nc_bypass (
.clk (clk),
.reset (reset),
.clk (clk),
.reset (reset),
// Core request in
.core_req_valid_in (core_req_valid),
@@ -178,29 +296,33 @@ module VX_cache #(
.core_rsp_ready_in (core_rsp_ready_nc),
// Core response out
.core_rsp_valid_out (core_rsp_valid),
.core_rsp_tmask_out (core_rsp_tmask),
.core_rsp_data_out (core_rsp_data),
.core_rsp_tag_out (core_rsp_tag),
.core_rsp_ready_out (core_rsp_ready),
.core_rsp_valid_out (core_rsp_valid_sb),
.core_rsp_tmask_out (core_rsp_tmask_sb),
.core_rsp_data_out (core_rsp_data_sb),
.core_rsp_tag_out (core_rsp_tag_sb),
.core_rsp_ready_out (core_rsp_ready_sb),
// Memory request in
.mem_req_valid_in (mem_req_valid_nc),
.mem_req_rw_in (mem_req_rw_nc),
.mem_req_byteen_in (mem_req_byteen_nc),
.mem_req_rw_in (mem_req_rw_nc),
.mem_req_addr_in (mem_req_addr_nc),
.mem_req_pmask_in (mem_req_pmask_nc),
.mem_req_byteen_in (mem_req_byteen_nc),
.mem_req_wsel_in (mem_req_wsel_nc),
.mem_req_data_in (mem_req_data_nc),
.mem_req_tag_in (mem_req_tag_nc),
.mem_req_ready_in (mem_req_ready_nc),
// Memory request out
.mem_req_valid_out (mem_req_valid),
.mem_req_rw_out (mem_req_rw),
.mem_req_byteen_out (mem_req_byteen),
.mem_req_addr_out (mem_req_addr),
.mem_req_data_out (mem_req_data),
.mem_req_tag_out (mem_req_tag),
.mem_req_ready_out (mem_req_ready),
.mem_req_valid_out (mem_req_valid_sb),
.mem_req_addr_out (mem_req_addr_sb),
.mem_req_rw_out (mem_req_rw_p),
.mem_req_pmask_out (mem_req_pmask_p),
.mem_req_byteen_out (mem_req_byteen_p),
.mem_req_wsel_out (mem_req_wsel_p),
.mem_req_data_out (mem_req_data_p),
.mem_req_tag_out (mem_req_tag_sb),
.mem_req_ready_out (mem_req_ready_sb),
// Memory response in
.mem_rsp_valid_in (mem_rsp_valid),
@@ -223,19 +345,21 @@ module VX_cache #(
assign core_req_tag_nc = core_req_tag;
assign core_req_ready = core_req_ready_nc;
assign core_rsp_valid = core_rsp_valid_nc;
assign core_rsp_tmask = core_rsp_tmask_nc;
assign core_rsp_data = core_rsp_data_nc;
assign core_rsp_tag = core_rsp_tag_nc;
assign core_rsp_ready_nc = core_rsp_ready;
assign core_rsp_valid_sb = core_rsp_valid_nc;
assign core_rsp_tmask_sb = core_rsp_tmask_nc;
assign core_rsp_data_sb = core_rsp_data_nc;
assign core_rsp_tag_sb = core_rsp_tag_nc;
assign core_rsp_ready_nc = core_rsp_ready_sb;
assign mem_req_valid = mem_req_valid_nc;
assign mem_req_rw = mem_req_rw_nc;
assign mem_req_addr = mem_req_addr_nc;
assign mem_req_byteen = mem_req_byteen_nc;
assign mem_req_data = mem_req_data_nc;
assign mem_req_tag = mem_req_tag_nc;
assign mem_req_ready_nc = mem_req_ready;
assign mem_req_valid_sb = mem_req_valid_nc;
assign mem_req_addr_sb = mem_req_addr_nc;
assign mem_req_rw_p = mem_req_rw_nc;
assign mem_req_pmask_p = mem_req_pmask_nc;
assign mem_req_byteen_p = mem_req_byteen_nc;
assign mem_req_wsel_p = mem_req_wsel_nc;
assign mem_req_data_p = mem_req_data_nc;
assign mem_req_tag_sb = mem_req_tag_nc;
assign mem_req_ready_nc = mem_req_ready_sb;
assign mem_rsp_valid_nc = mem_rsp_valid;
assign mem_rsp_data_nc = mem_rsp_data;
@@ -246,17 +370,19 @@ module VX_cache #(
///////////////////////////////////////////////////////////////////////////
wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_qual;
wire [`MEM_ADDR_WIDTH-1:0] mem_rsp_tag_qual;
wire [MEM_TAG_IN_WIDTH-1:0] mem_rsp_tag_qual;
wire mrsq_out_valid, mrsq_out_ready;
`RESET_RELAY (mrsq_reset);
VX_elastic_buffer #(
.DATAW (`MEM_ADDR_WIDTH + `CACHE_LINE_WIDTH),
.SIZE (MRSQ_SIZE),
.OUTPUT_REG (MRSQ_SIZE > 2)
.DATAW (MEM_TAG_IN_WIDTH + `CACHE_LINE_WIDTH),
.SIZE (MRSQ_SIZE),
.OUT_REG (MRSQ_SIZE > 2)
) mem_rsp_queue (
.clk (clk),
.reset (reset),
.reset (mrsq_reset),
.ready_in (mem_rsp_ready_nc),
.valid_in (mem_rsp_valid_nc),
.data_in ({mem_rsp_tag_nc, mem_rsp_data_nc}),
@@ -272,13 +398,15 @@ module VX_cache #(
wire [`LINE_SELECT_BITS-1:0] flush_addr;
wire flush_enable;
`RESET_RELAY (flush_reset);
VX_flush_ctrl #(
.CACHE_SIZE (CACHE_SIZE),
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
.NUM_BANKS (NUM_BANKS)
) flush_ctrl (
.clk (clk),
.reset (reset),
.reset (flush_reset),
.addr_out (flush_addr),
.valid_out (flush_enable)
);
@@ -287,36 +415,38 @@ module VX_cache #(
wire [NUM_BANKS-1:0] per_bank_core_req_valid;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_req_pmask;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] per_bank_core_req_wsel;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] per_bank_core_req_wsel;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_X_WIDTH-1:0] per_bank_core_req_tag;
wire [NUM_BANKS-1:0] per_bank_core_req_rw;
wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr;
wire [NUM_BANKS-1:0][CORE_TAG_X_WIDTH-1:0] per_bank_core_req_tag;
wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr;
wire [NUM_BANKS-1:0] per_bank_core_req_ready;
wire [NUM_BANKS-1:0] per_bank_core_rsp_valid;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_pmask;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid;
wire [NUM_BANKS-1:0][CORE_TAG_X_WIDTH-1:0] per_bank_core_rsp_tag;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_X_WIDTH-1:0] per_bank_core_rsp_tag;
wire [NUM_BANKS-1:0] per_bank_core_rsp_ready;
wire [NUM_BANKS-1:0] per_bank_mem_req_valid;
wire [NUM_BANKS-1:0] per_bank_mem_req_rw;
wire [NUM_BANKS-1:0][CACHE_LINE_SIZE-1:0] per_bank_mem_req_byteen;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_mem_req_pmask;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_mem_req_byteen;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] per_bank_mem_req_wsel;
wire [NUM_BANKS-1:0][`MEM_ADDR_WIDTH-1:0] per_bank_mem_req_addr;
wire [NUM_BANKS-1:0][`CACHE_LINE_WIDTH-1:0] per_bank_mem_req_data;
wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_mem_req_data;
wire [NUM_BANKS-1:0] per_bank_mem_req_ready;
wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready;
if (NUM_BANKS == 1) begin
`UNUSED_VAR (mem_rsp_tag_qual)
assign mrsq_out_ready = per_bank_mem_rsp_ready;
end else begin
assign mrsq_out_ready = per_bank_mem_rsp_ready[`MEM_ADDR_BANK(mem_rsp_tag_qual)];
assign mrsq_out_ready = per_bank_mem_rsp_ready[`MEM_TAG_TO_BANK_ID(mem_rsp_tag_qual)];
end
VX_core_req_bank_sel #(
@@ -358,31 +488,34 @@ module VX_cache #(
for (genvar i = 0; i < NUM_BANKS; i++) begin
wire curr_bank_core_req_valid;
wire [NUM_PORTS-1:0] curr_bank_core_req_pmask;
wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] curr_bank_core_req_wsel;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] curr_bank_core_req_wsel;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] curr_bank_core_req_byteen;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] curr_bank_core_req_data;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] curr_bank_core_req_tid;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] curr_bank_core_req_tid;
wire [NUM_PORTS-1:0][CORE_TAG_X_WIDTH-1:0] curr_bank_core_req_tag;
wire curr_bank_core_req_rw;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_core_req_addr;
wire [CORE_TAG_X_WIDTH-1:0] curr_bank_core_req_tag;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_core_req_addr;
wire curr_bank_core_req_ready;
wire curr_bank_core_rsp_valid;
wire [NUM_PORTS-1:0] curr_bank_core_rsp_pmask;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] curr_bank_core_rsp_data;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] curr_bank_core_rsp_tid;
wire [CORE_TAG_X_WIDTH-1:0] curr_bank_core_rsp_tag;
wire [NUM_PORTS-1:0][CORE_TAG_X_WIDTH-1:0] curr_bank_core_rsp_tag;
wire curr_bank_core_rsp_ready;
wire curr_bank_mem_req_valid;
wire curr_bank_mem_req_rw;
wire [CACHE_LINE_SIZE-1:0] curr_bank_mem_req_byteen;
wire [NUM_PORTS-1:0] curr_bank_mem_req_pmask;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] curr_bank_mem_req_byteen;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] curr_bank_mem_req_wsel;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr;
wire[`CACHE_LINE_WIDTH-1:0] curr_bank_mem_req_data;
wire [MSHR_ADDR_WIDTH-1:0] curr_bank_mem_req_id;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] curr_bank_mem_req_data;
wire curr_bank_mem_req_ready;
wire curr_bank_mem_rsp_valid;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_mem_rsp_addr;
wire curr_bank_mem_rsp_valid;
wire [MSHR_ADDR_WIDTH-1:0] curr_bank_mem_rsp_id;
wire [`CACHE_LINE_WIDTH-1:0] curr_bank_mem_rsp_data;
wire curr_bank_mem_rsp_ready;
@@ -407,27 +540,31 @@ module VX_cache #(
assign per_bank_core_rsp_data [i] = curr_bank_core_rsp_data;
// Memory request
assign per_bank_mem_req_valid[i] = curr_bank_mem_req_valid;
assign per_bank_mem_req_rw[i] = curr_bank_mem_req_rw;
assign per_bank_mem_req_valid[i] = curr_bank_mem_req_valid;
assign per_bank_mem_req_rw[i] = curr_bank_mem_req_rw;
assign per_bank_mem_req_pmask[i] = curr_bank_mem_req_pmask;
assign per_bank_mem_req_byteen[i] = curr_bank_mem_req_byteen;
assign per_bank_mem_req_wsel[i] = curr_bank_mem_req_wsel;
if (NUM_BANKS == 1) begin
assign per_bank_mem_req_addr[i] = curr_bank_mem_req_addr;
end else begin
assign per_bank_mem_req_addr[i] = `LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, i);
end
assign per_bank_mem_req_id[i] = curr_bank_mem_req_id;
assign per_bank_mem_req_data[i] = curr_bank_mem_req_data;
assign curr_bank_mem_req_ready = per_bank_mem_req_ready[i];
assign curr_bank_mem_req_ready = per_bank_mem_req_ready[i];
// Memory response
if (NUM_BANKS == 1) begin
assign curr_bank_mem_rsp_valid = mrsq_out_valid;
assign curr_bank_mem_rsp_addr = mem_rsp_tag_qual;
assign curr_bank_mem_rsp_valid = mrsq_out_valid;
end else begin
assign curr_bank_mem_rsp_valid = mrsq_out_valid && (`MEM_ADDR_BANK(mem_rsp_tag_qual) == i);
assign curr_bank_mem_rsp_addr = `MEM_TO_LINE_ADDR(mem_rsp_tag_qual);
assign curr_bank_mem_rsp_valid = mrsq_out_valid && (`MEM_TAG_TO_BANK_ID(mem_rsp_tag_qual) == i);
end
assign curr_bank_mem_rsp_id = `MEM_TAG_TO_REQ_ID(mem_rsp_tag_qual);
assign curr_bank_mem_rsp_data = mem_rsp_data_qual;
assign per_bank_mem_rsp_ready[i] = curr_bank_mem_rsp_ready;
`RESET_RELAY (bank_reset);
VX_bank #(
.BANK_ID (i),
@@ -450,7 +587,7 @@ module VX_cache #(
`SCOPE_BIND_VX_cache_bank(i)
.clk (clk),
.reset (reset),
.reset (bank_reset),
`ifdef PERF_ENABLE
.perf_read_misses (perf_read_miss_per_bank[i]),
@@ -482,14 +619,17 @@ module VX_cache #(
// Memory request
.mem_req_valid (curr_bank_mem_req_valid),
.mem_req_rw (curr_bank_mem_req_rw),
.mem_req_pmask (curr_bank_mem_req_pmask),
.mem_req_byteen (curr_bank_mem_req_byteen),
.mem_req_wsel (curr_bank_mem_req_wsel),
.mem_req_addr (curr_bank_mem_req_addr),
.mem_req_id (curr_bank_mem_req_id),
.mem_req_data (curr_bank_mem_req_data),
.mem_req_ready (curr_bank_mem_req_ready),
// Memory response
.mem_rsp_valid (curr_bank_mem_rsp_valid),
.mem_rsp_addr (curr_bank_mem_rsp_addr),
.mem_rsp_id (curr_bank_mem_rsp_id),
.mem_rsp_data (curr_bank_mem_rsp_data),
.mem_rsp_ready (curr_bank_mem_rsp_ready),
@@ -523,53 +663,66 @@ module VX_cache #(
.core_rsp_ready (core_rsp_ready_nc)
);
wire [NUM_BANKS-1:0][(`MEM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH)-1:0] data_in;
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign data_in[i] = {per_bank_mem_req_addr[i], per_bank_mem_req_rw[i], per_bank_mem_req_byteen[i], per_bank_mem_req_data[i]};
wire [NUM_BANKS-1:0][(`MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH))-1:0] data_in;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign data_in[i] = {per_bank_mem_req_addr[i], per_bank_mem_req_id[i], per_bank_mem_req_rw[i], per_bank_mem_req_pmask[i], per_bank_mem_req_byteen[i], per_bank_mem_req_wsel[i], per_bank_mem_req_data[i]};
end
wire [MSHR_ADDR_WIDTH-1:0] mem_req_id;
`RESET_RELAY (mreq_reset);
VX_stream_arbiter #(
.NUM_REQS (NUM_BANKS),
.DATAW (`MEM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH),
.BUFFERED (1)
.DATAW (`MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH)),
.TYPE ("R")
) mem_req_arb (
.clk (clk),
.reset (reset),
.reset (mreq_reset),
.valid_in (per_bank_mem_req_valid),
.data_in (data_in),
.ready_in (per_bank_mem_req_ready),
.valid_out (mem_req_valid_nc),
.data_out ({mem_req_addr_nc, mem_req_rw_nc, mem_req_byteen_nc, mem_req_data_nc}),
.data_out ({mem_req_addr_nc, mem_req_id, mem_req_rw_nc, mem_req_pmask_nc, mem_req_byteen_nc, mem_req_wsel_nc, mem_req_data_nc}),
.ready_out (mem_req_ready_nc)
);
assign mem_req_tag_nc = mem_req_addr_nc;
if (NUM_BANKS == 1) begin
assign mem_req_tag_nc = MEM_TAG_IN_WIDTH'(mem_req_id);
end else begin
assign mem_req_tag_nc = MEM_TAG_IN_WIDTH'({`MEM_ADDR_TO_BANK_ID(mem_req_addr_nc), mem_req_id});
end
`ifdef PERF_ENABLE
// per cycle: core_reads, core_writes
reg [($clog2(NUM_REQS+1)-1):0] perf_core_reads_per_cycle;
reg [($clog2(NUM_REQS+1)-1):0] perf_core_writes_per_cycle;
reg [($clog2(NUM_REQS+1)-1):0] perf_crsp_stall_per_cycle;
wire [$clog2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle;
wire [$clog2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle;
wire [$clog2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle;
assign perf_core_reads_per_cycle = $countones(core_req_valid & core_req_ready & ~core_req_rw);
assign perf_core_writes_per_cycle = $countones(core_req_valid & core_req_ready & core_req_rw);
wire [NUM_REQS-1:0] perf_core_reads_per_mask = core_req_valid & core_req_ready & ~core_req_rw;
wire [NUM_REQS-1:0] perf_core_writes_per_mask = core_req_valid & core_req_ready & core_req_rw;
`POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_mask);
`POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_mask);
if (CORE_TAG_ID_BITS != 0) begin
assign perf_crsp_stall_per_cycle = $countones(core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}});
wire [NUM_REQS-1:0] perf_crsp_stall_per_mask = core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}};
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_mask);
end else begin
assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & ~core_rsp_ready);
wire [NUM_REQS-1:0] perf_crsp_stall_per_mask = core_rsp_valid & ~core_rsp_ready;
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_mask);
end
// per cycle: read misses, write misses, msrq stalls, pipeline stalls
reg [($clog2(NUM_BANKS+1)-1):0] perf_read_miss_per_cycle;
reg [($clog2(NUM_BANKS+1)-1):0] perf_write_miss_per_cycle;
reg [($clog2(NUM_BANKS+1)-1):0] perf_mshr_stall_per_cycle;
reg [($clog2(NUM_BANKS+1)-1):0] perf_pipe_stall_per_cycle;
wire [$clog2(NUM_BANKS+1)-1:0] perf_read_miss_per_cycle;
wire [$clog2(NUM_BANKS+1)-1:0] perf_write_miss_per_cycle;
wire [$clog2(NUM_BANKS+1)-1:0] perf_mshr_stall_per_cycle;
wire [$clog2(NUM_BANKS+1)-1:0] perf_pipe_stall_per_cycle;
assign perf_read_miss_per_cycle = $countones(perf_read_miss_per_bank);
assign perf_write_miss_per_cycle = $countones(perf_write_miss_per_bank);
assign perf_mshr_stall_per_cycle = $countones(perf_mshr_stall_per_bank);
assign perf_pipe_stall_per_cycle = $countones(perf_pipe_stall_per_bank);
`POP_COUNT(perf_read_miss_per_cycle, perf_read_miss_per_bank);
`POP_COUNT(perf_write_miss_per_cycle, perf_write_miss_per_bank);
`POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank);
`POP_COUNT(perf_pipe_stall_per_cycle, perf_pipe_stall_per_bank);
reg [`PERF_CTR_BITS-1:0] perf_core_reads;
reg [`PERF_CTR_BITS-1:0] perf_core_writes;

View File

@@ -9,8 +9,10 @@
`define REQS_BITS `LOG2UP(NUM_REQS)
// tag valid tid word_sel
`define MSHR_DATA_WIDTH (CORE_TAG_WIDTH + (1 + `REQS_BITS + `UP(`WORD_SELECT_BITS)) * NUM_PORTS)
`define PORTS_BITS `LOG2UP(NUM_PORTS)
// tag valid tid word_sel
`define MSHR_DATA_WIDTH ((CORE_TAG_WIDTH + 1 + `REQS_BITS + `UP(`WORD_SELECT_BITS)) * NUM_PORTS)
`define WORD_WIDTH (8 * WORD_SIZE)
@@ -57,14 +59,14 @@
`define CORE_RSP_TAGS ((CORE_TAG_ID_BITS != 0) ? 1 : NUM_REQS)
`define BANK_READY_COUNT ((SHARED_BANK_READY != 0) ? 1 : NUM_BANKS)
`define MEM_ADDR_BANK(x) x[`BANK_SELECT_BITS+BANK_ADDR_OFFSET-1 : BANK_ADDR_OFFSET]
`define MEM_TO_LINE_ADDR(x) x[`MEM_ADDR_WIDTH-1 : `BANK_SELECT_BITS]
`define LINE_TO_MEM_ADDR(x, i) {x, `BANK_SELECT_BITS'(i)}
`define MEM_ADDR_TO_BANK_ID(x) x[0 +: `BANK_SELECT_BITS]
`define MEM_TAG_TO_REQ_ID(x) x[MSHR_ADDR_WIDTH-1:0]
`define MEM_TAG_TO_BANK_ID(x) x[MSHR_ADDR_WIDTH +: `BANK_SELECT_BITS]
`define LINE_TO_BYTE_ADDR(x, i) {x, (32-$bits(x))'(i << (32-$bits(x)-`BANK_SELECT_BITS))}
`define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)}

View File

@@ -16,9 +16,7 @@ module VX_core_req_bank_sel #(
// core request tag size
parameter CORE_TAG_WIDTH = 3,
// bank offset from beginning of index range
parameter BANK_ADDR_OFFSET = 0,
// shared bank ready signal
parameter SHARED_BANK_READY = 0
parameter BANK_ADDR_OFFSET = 0
) (
input wire clk,
input wire reset,
@@ -43,8 +41,8 @@ module VX_core_req_bank_sel #(
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen,
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data,
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid,
output wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag,
input wire [`BANK_READY_COUNT-1:0] per_bank_core_req_ready
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag,
input wire [NUM_BANKS-1:0] per_bank_core_req_ready
);
`UNUSED_PARAM (CACHE_ID)
`STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value"))
@@ -80,9 +78,9 @@ module VX_core_req_bank_sel #(
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen_r;
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data_r;
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_r;
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_r;
reg [NUM_BANKS-1:0] per_bank_core_req_rw_r;
reg [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr_r;
reg [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_r;
reg [NUM_REQS-1:0] core_req_ready_r;
if (NUM_REQS > 1) begin
@@ -101,7 +99,7 @@ module VX_core_req_bank_sel #(
end
end
for (genvar i = NUM_REQS-1; i >= 0; --i) begin
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_line_match[i] = (core_req_line_addr[i] == per_bank_line_addr_r[core_req_bid[i]]);
end
@@ -129,30 +127,19 @@ module VX_core_req_bank_sel #(
per_bank_core_req_byteen_r[core_req_bid[i]][i % NUM_PORTS] = core_req_byteen[i];
per_bank_core_req_data_r[core_req_bid[i]][i % NUM_PORTS] = core_req_data[i];
per_bank_core_req_tid_r[core_req_bid[i]][i % NUM_PORTS] = `REQS_BITS'(i);
per_bank_core_req_tag_r[core_req_bid[i]][i % NUM_PORTS] = core_req_tag[i];
per_bank_core_req_rw_r[core_req_bid[i]] = core_req_rw[i];
per_bank_core_req_addr_r[core_req_bid[i]] = core_req_line_addr[i];
per_bank_core_req_tag_r[core_req_bid[i]] = core_req_tag[i];
req_select_table_r[core_req_bid[i]][i % NUM_PORTS] = (1 << i);
end
end
end
if (SHARED_BANK_READY == 0) begin
always @(*) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
core_req_ready_r[i] = per_bank_core_req_ready[core_req_bid[i]]
&& core_req_line_match[i]
&& req_select_table_r[core_req_bid[i]][i % NUM_PORTS][i];
end
end
end else begin
always @(*) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
core_req_ready_r[i] = per_bank_core_req_ready
&& core_req_line_match[i]
&& req_select_table_r[core_req_bid[i]][i % NUM_PORTS][i];
end
always @(*) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
core_req_ready_r[i] = per_bank_core_req_ready[core_req_bid[i]]
&& core_req_line_match[i]
&& req_select_table_r[core_req_bid[i]][i % NUM_PORTS][i];
end
end
@@ -177,32 +164,17 @@ module VX_core_req_bank_sel #(
per_bank_core_req_byteen_r[core_req_bid[i]][i % NUM_PORTS] = core_req_byteen[i];
per_bank_core_req_data_r[core_req_bid[i]][i % NUM_PORTS] = core_req_data[i];
per_bank_core_req_tid_r[core_req_bid[i]][i % NUM_PORTS] = `REQS_BITS'(i);
per_bank_core_req_tag_r[core_req_bid[i]][i % NUM_PORTS] = core_req_tag[i];
per_bank_core_req_rw_r[core_req_bid[i]] = core_req_rw[i];
per_bank_core_req_addr_r[core_req_bid[i]] = core_req_line_addr[i];
per_bank_core_req_tag_r[core_req_bid[i]] = core_req_tag[i];
per_bank_core_req_addr_r[core_req_bid[i]] = core_req_line_addr[i];
end
end
end
if (SHARED_BANK_READY == 0) begin
always @(*) begin
core_req_ready_r = 'x;
for (integer i = NUM_REQS-1; i >= 0; --i) begin
if (core_req_valid[i]) begin
core_req_ready_r[i] = per_bank_core_req_ready[core_req_bid[i]]
&& core_req_line_match[i];
end
end
end
end else begin
always @(*) begin
core_req_ready_r = 'x;
for (integer i = NUM_REQS-1; i >= 0; --i) begin
if (core_req_valid[i]) begin
core_req_ready_r[i] = per_bank_core_req_ready
&& core_req_line_match[i];
end
end
always @(*) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
core_req_ready_r[i] = per_bank_core_req_ready[core_req_bid[i]]
&& core_req_line_match[i];
end
end
end
@@ -236,22 +208,11 @@ module VX_core_req_bank_sel #(
end
if (NUM_BANKS > 1) begin
if (SHARED_BANK_READY == 0) begin
always @(*) begin
core_req_ready_r = 0;
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_req_valid_r[i]) begin
core_req_ready_r[per_bank_core_req_tid_r[i]] = per_bank_core_req_ready[i];
end
end
end
end else begin
always @(*) begin
core_req_ready_r = 0;
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_req_valid_r[i]) begin
core_req_ready_r[per_bank_core_req_tid_r[i]] = per_bank_core_req_ready;
end
always @(*) begin
core_req_ready_r = 0;
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_req_valid_r[i]) begin
core_req_ready_r[per_bank_core_req_tid_r[i]] = per_bank_core_req_ready[i];
end
end
end
@@ -320,33 +281,26 @@ module VX_core_req_bank_sel #(
`ifdef PERF_ENABLE
reg [NUM_REQS-1:0] core_req_sel_r;
if (SHARED_BANK_READY == 0) begin
always @(*) begin
core_req_sel_r = 0;
for (integer i = 0; i < NUM_REQS; ++i) begin
if (core_req_valid[i]) begin
core_req_sel_r[i] = per_bank_core_req_ready[core_req_bid[i]];
end
end
end
end else begin
always @(*) begin
core_req_sel_r = 0;
for (integer i = 0; i < NUM_REQS; ++i) begin
if (core_req_valid[i]) begin
core_req_sel_r[i] = per_bank_core_req_ready;
end
always @(*) begin
core_req_sel_r = 0;
for (integer i = 0; i < NUM_REQS; ++i) begin
if (core_req_valid[i]) begin
core_req_sel_r[i] = per_bank_core_req_ready[core_req_bid[i]];
end
end
end
reg [`PERF_CTR_BITS-1:0] bank_stalls_r;
wire [$clog2(NUM_REQS+1)-1:0] bank_stall_cnt;
wire [NUM_REQS-1:0] bank_stall_mask = core_req_sel_r & ~core_req_ready;
`POP_COUNT(bank_stall_cnt, bank_stall_mask);
always @(posedge clk) begin
if (reset) begin
bank_stalls_r <= 0;
end else begin
bank_stalls_r <= bank_stalls_r + `PERF_CTR_BITS'($countones(core_req_sel_r & ~core_req_ready));
bank_stalls_r <= bank_stalls_r + `PERF_CTR_BITS'(bank_stall_cnt);
end
end

350
hw/rtl/cache/VX_core_rsp_merge.sv vendored Normal file
View File

@@ -0,0 +1,350 @@
`include "VX_cache_define.vh"
module VX_core_rsp_merge #(
parameter CACHE_ID = 0,
// Number of Word requests per cycle
parameter NUM_REQS = 1,
// Number of banks
parameter NUM_BANKS = 1,
// Number of ports per banks
parameter NUM_PORTS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 1,
// core request tag size
parameter CORE_TAG_WIDTH = 1,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0,
// output register
parameter OUT_REG = 0
) (
input wire clk,
input wire reset,
// Per Bank WB
input wire [NUM_BANKS-1:0] per_bank_core_rsp_valid,
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_pmask,
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data,
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid,
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag,
output wire [NUM_BANKS-1:0] per_bank_core_rsp_ready,
// Core Response
output wire [`CORE_RSP_TAGS-1:0] core_rsp_valid,
output wire [NUM_REQS-1:0] core_rsp_tmask,
output wire [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
input wire [`CORE_RSP_TAGS-1:0] core_rsp_ready
);
`UNUSED_PARAM (CACHE_ID)
if (NUM_BANKS > 1) begin
reg [NUM_REQS-1:0] core_rsp_valid_unqual;
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
reg [NUM_BANKS-1:0] per_bank_core_rsp_ready_r;
if (CORE_TAG_ID_BITS != 0) begin
// The core response bus handles a single tag at the time
// We first need to select the current tag to process,
// then send all bank responses for that tag as a batch
wire [CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
wire core_rsp_ready_unqual;
if (NUM_PORTS > 1) begin
reg [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_sent_r, per_bank_core_rsp_sent;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_sent_n;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign per_bank_core_rsp_sent_n[i] = per_bank_core_rsp_sent_r[i] | per_bank_core_rsp_sent[i];
end
always @(posedge clk) begin
if (reset) begin
per_bank_core_rsp_sent_r <= '0;
end else begin
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_rsp_sent_n[i] == per_bank_core_rsp_pmask[i]) begin
per_bank_core_rsp_sent_r[i] <= '0;
end else begin
per_bank_core_rsp_sent_r[i] <= per_bank_core_rsp_sent_n[i];
end
end
end
end
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_valid_p;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
for (genvar p = 0; p < NUM_PORTS; ++p) begin
assign per_bank_core_rsp_valid_p[i][p] = per_bank_core_rsp_valid[i]
&& per_bank_core_rsp_pmask[i][p]
&& !per_bank_core_rsp_sent_r[i][p];
end
end
VX_find_first #(
.N (NUM_BANKS * NUM_PORTS),
.DATAW (CORE_TAG_WIDTH)
) find_first (
.valid_i (per_bank_core_rsp_valid_p),
.data_i (per_bank_core_rsp_tag),
.data_o (core_rsp_tag_unqual),
`UNUSED_PIN (valid_o)
);
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_data_unqual = 'x;
per_bank_core_rsp_sent = 0;
for (integer i = 0; i < NUM_BANKS; ++i) begin
for (integer p = 0; p < NUM_PORTS; ++p) begin
if (per_bank_core_rsp_valid[i]
&& per_bank_core_rsp_pmask[i][p]
&& !per_bank_core_rsp_sent_r[i][p]
&& (per_bank_core_rsp_tag[i][p][CORE_TAG_ID_BITS-1:0] == core_rsp_tag_unqual[CORE_TAG_ID_BITS-1:0])) begin
core_rsp_valid_unqual[per_bank_core_rsp_tid[i][p]] = 1;
core_rsp_data_unqual[per_bank_core_rsp_tid[i][p]] = per_bank_core_rsp_data[i][p];
per_bank_core_rsp_sent[i][p] = core_rsp_ready_unqual;
end
end
end
end
always @(*) begin
for (integer i = 0; i < NUM_BANKS; ++i) begin
per_bank_core_rsp_ready_r[i] = (per_bank_core_rsp_sent_n[i] == per_bank_core_rsp_pmask[i]);
end
end
end else begin
`UNUSED_VAR (per_bank_core_rsp_pmask)
VX_find_first #(
.N (NUM_BANKS),
.DATAW (CORE_TAG_WIDTH)
) find_first (
.valid_i (per_bank_core_rsp_valid),
.data_i (per_bank_core_rsp_tag),
.data_o (core_rsp_tag_unqual),
`UNUSED_PIN (valid_o)
);
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_data_unqual = 'x;
per_bank_core_rsp_ready_r = 0;
for (integer i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_rsp_valid[i]
&& (per_bank_core_rsp_tag[i][0][CORE_TAG_ID_BITS-1:0] == core_rsp_tag_unqual[CORE_TAG_ID_BITS-1:0])) begin
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
per_bank_core_rsp_ready_r[i] = core_rsp_ready_unqual;
end
end
end
end
wire core_rsp_valid_any = (| per_bank_core_rsp_valid);
VX_skid_buffer #(
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH)),
.PASSTHRU (0 == OUT_REG)
) out_sbuf (
.clk (clk),
.reset (reset),
.valid_in (core_rsp_valid_any),
.data_in ({core_rsp_valid_unqual, core_rsp_tag_unqual, core_rsp_data_unqual}),
.ready_in (core_rsp_ready_unqual),
.valid_out (core_rsp_valid),
.data_out ({core_rsp_tmask, core_rsp_tag, core_rsp_data}),
.ready_out (core_rsp_ready)
);
end else begin
reg [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
wire [NUM_REQS-1:0] core_rsp_ready_unqual;
if (NUM_PORTS > 1) begin
reg [NUM_REQS-1:0][(`PORTS_BITS + `BANK_SELECT_BITS)-1:0] bank_select_table;
reg [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_sent_r, per_bank_core_rsp_sent;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_sent_n;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign per_bank_core_rsp_sent_n[i] = per_bank_core_rsp_sent_r[i] | per_bank_core_rsp_sent[i];
end
always @(posedge clk) begin
if (reset) begin
per_bank_core_rsp_sent_r <= '0;
end else begin
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_rsp_sent_n[i] == per_bank_core_rsp_pmask[i]) begin
per_bank_core_rsp_sent_r[i] <= '0;
end else begin
per_bank_core_rsp_sent_r[i] <= per_bank_core_rsp_sent_n[i];
end
end
end
end
always @(*) begin
core_rsp_valid_unqual = '0;
core_rsp_tag_unqual = 'x;
core_rsp_data_unqual = 'x;
bank_select_table = 'x;
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
for (integer p = 0; p < NUM_PORTS; ++p) begin
if (per_bank_core_rsp_valid[i]
&& per_bank_core_rsp_pmask[i][p]
&& !per_bank_core_rsp_sent_r[i][p]) begin
core_rsp_valid_unqual[per_bank_core_rsp_tid[i][p]] = 1;
core_rsp_tag_unqual[per_bank_core_rsp_tid[i][p]] = per_bank_core_rsp_tag[i][p];
core_rsp_data_unqual[per_bank_core_rsp_tid[i][p]] = per_bank_core_rsp_data[i][p];
bank_select_table[per_bank_core_rsp_tid[i][p]] = {`PORTS_BITS'(p), `BANK_SELECT_BITS'(i)};
end
end
end
end
always @(*) begin
per_bank_core_rsp_sent = '0;
for (integer i = 0; i < NUM_REQS; i++) begin
if (core_rsp_valid_unqual[i]) begin
per_bank_core_rsp_sent[bank_select_table[i][0 +: `BANK_SELECT_BITS]][bank_select_table[i][`BANK_SELECT_BITS +: `PORTS_BITS]] = core_rsp_ready_unqual[i];
end
end
end
always @(*) begin
for (integer i = 0; i < NUM_BANKS; i++) begin
per_bank_core_rsp_ready_r[i] = (per_bank_core_rsp_sent_n[i] == per_bank_core_rsp_pmask[i]);
end
end
end else begin
`UNUSED_VAR (per_bank_core_rsp_pmask)
reg [NUM_REQS-1:0][NUM_BANKS-1:0] bank_select_table;
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_tag_unqual = 'x;
core_rsp_data_unqual = 'x;
bank_select_table = 'x;
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
if (per_bank_core_rsp_valid[i]) begin
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
core_rsp_tag_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
bank_select_table[per_bank_core_rsp_tid[i]] = (1 << i);
end
end
end
always @(*) begin
for (integer i = 0; i < NUM_BANKS; ++i) begin
per_bank_core_rsp_ready_r[i] = core_rsp_ready_unqual[per_bank_core_rsp_tid[i]]
&& bank_select_table[per_bank_core_rsp_tid[i]][i];
end
end
end
for (genvar i = 0; i < NUM_REQS; i++) begin
VX_skid_buffer #(
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH),
.PASSTHRU (0 == OUT_REG)
) out_sbuf (
.clk (clk),
.reset (reset),
.valid_in (core_rsp_valid_unqual[i]),
.data_in ({core_rsp_tag_unqual[i], core_rsp_data_unqual[i]}),
.ready_in (core_rsp_ready_unqual[i]),
.valid_out (core_rsp_valid[i]),
.data_out ({core_rsp_tag[i],core_rsp_data[i]}),
.ready_out (core_rsp_ready[i])
);
end
assign core_rsp_tmask = core_rsp_valid;
end
assign per_bank_core_rsp_ready = per_bank_core_rsp_ready_r;
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
`UNUSED_VAR (per_bank_core_rsp_pmask)
if (NUM_REQS > 1) begin
reg [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
if (CORE_TAG_ID_BITS != 0) begin
reg [NUM_REQS-1:0] core_rsp_tmask_unqual;
always @(*) begin
core_rsp_tmask_unqual = 0;
core_rsp_tmask_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_valid;
core_rsp_tag_unqual = per_bank_core_rsp_tag;
core_rsp_data_unqual = 'x;
core_rsp_data_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_data;
end
assign core_rsp_valid = per_bank_core_rsp_valid;
assign core_rsp_tmask = core_rsp_tmask_unqual;
assign per_bank_core_rsp_ready = core_rsp_ready;
end else begin
reg [`CORE_RSP_TAGS-1:0] core_rsp_valid_unqual;
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_valid_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_valid;
core_rsp_tag_unqual = 'x;
core_rsp_tag_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_tag;
core_rsp_data_unqual = 'x;
core_rsp_data_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_data;
end
assign core_rsp_valid = core_rsp_valid_unqual;
assign core_rsp_tmask = core_rsp_valid_unqual;
assign per_bank_core_rsp_ready = core_rsp_ready[per_bank_core_rsp_tid];
end
assign core_rsp_tag = core_rsp_tag_unqual;
assign core_rsp_data = core_rsp_data_unqual;
end else begin
`UNUSED_VAR(per_bank_core_rsp_tid)
assign core_rsp_valid = per_bank_core_rsp_valid;
assign core_rsp_tmask = per_bank_core_rsp_valid;
assign core_rsp_tag = per_bank_core_rsp_tag;
assign core_rsp_data = per_bank_core_rsp_data;
assign per_bank_core_rsp_ready = core_rsp_ready;
end
end
endmodule

View File

@@ -1,239 +0,0 @@
`include "VX_cache_define.vh"
module VX_core_rsp_merge #(
parameter CACHE_ID = 0,
// Number of Word requests per cycle
parameter NUM_REQS = 1,
// Number of banks
parameter NUM_BANKS = 1,
// Number of ports per banks
parameter NUM_PORTS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 1,
// core request tag size
parameter CORE_TAG_WIDTH = 1,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0
) (
input wire clk,
input wire reset,
// Per Bank WB
input wire [NUM_BANKS-1:0] per_bank_core_rsp_valid,
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_pmask,
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data,
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid,
input wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag,
output wire [NUM_BANKS-1:0] per_bank_core_rsp_ready,
// Core Response
output wire [`CORE_RSP_TAGS-1:0] core_rsp_valid,
output wire [NUM_REQS-1:0] core_rsp_tmask,
output wire [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
input wire [`CORE_RSP_TAGS-1:0] core_rsp_ready
);
`UNUSED_PARAM (CACHE_ID)
if (NUM_BANKS > 1) begin
reg [NUM_REQS-1:0] core_rsp_valid_unqual;
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
reg [NUM_BANKS-1:0] core_rsp_bank_select;
if (CORE_TAG_ID_BITS != 0) begin
// The core response bus handles a single tag at the time
// We first need to select the current tag to process,
// then send all bank responses for that tag as a batch
reg [CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
wire core_rsp_ready_unqual;
always @(*) begin
core_rsp_tag_unqual = 'x;
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
if (per_bank_core_rsp_valid[i]) begin
core_rsp_tag_unqual = per_bank_core_rsp_tag[i];
end
end
end
if (NUM_PORTS > 1) begin
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_data_unqual = 'x;
core_rsp_bank_select = 0;
for (integer i = 0; i < NUM_BANKS; i++) begin
for (integer p = 0; p < NUM_PORTS; p++) begin
if (per_bank_core_rsp_valid[i]
&& per_bank_core_rsp_pmask[i][p]
&& (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == core_rsp_tag_unqual[CORE_TAG_ID_BITS-1:0])) begin
core_rsp_valid_unqual[per_bank_core_rsp_tid[i][p]] = 1;
core_rsp_data_unqual[per_bank_core_rsp_tid[i][p]] = per_bank_core_rsp_data[i][p];
core_rsp_bank_select[i] = core_rsp_ready_unqual;
end
end
end
end
end else begin
`UNUSED_VAR (per_bank_core_rsp_pmask)
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_data_unqual = 'x;
core_rsp_bank_select = 0;
for (integer i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_rsp_valid[i]
&& (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == core_rsp_tag_unqual[CORE_TAG_ID_BITS-1:0])) begin
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
core_rsp_bank_select[i] = core_rsp_ready_unqual;
end
end
end
end
wire core_rsp_valid_any = (| per_bank_core_rsp_valid);
VX_skid_buffer #(
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH))
) pipe_reg (
.clk (clk),
.reset (reset),
.valid_in (core_rsp_valid_any),
.data_in ({core_rsp_valid_unqual, core_rsp_tag_unqual, core_rsp_data_unqual}),
.ready_in (core_rsp_ready_unqual),
.valid_out (core_rsp_valid),
.data_out ({core_rsp_tmask, core_rsp_tag, core_rsp_data}),
.ready_out (core_rsp_ready)
);
end else begin
`UNUSED_VAR (per_bank_core_rsp_pmask)
reg [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
reg [NUM_REQS-1:0][NUM_BANKS-1:0] bank_select_table;
wire [NUM_REQS-1:0] core_rsp_ready_unqual;
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_tag_unqual = 'x;
core_rsp_data_unqual = 'x;
bank_select_table = 'x;
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
if (per_bank_core_rsp_valid[i]) begin
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
core_rsp_tag_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
bank_select_table[per_bank_core_rsp_tid[i]] = (1 << i);
end
end
end
always @(*) begin
for (integer i = 0; i < NUM_BANKS; i++) begin
core_rsp_bank_select[i] = core_rsp_ready_unqual[per_bank_core_rsp_tid[i]]
&& bank_select_table[per_bank_core_rsp_tid[i]][i];
end
end
for (genvar i = 0; i < NUM_REQS; i++) begin
VX_skid_buffer #(
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH)
) pipe_reg (
.clk (clk),
.reset (reset),
.valid_in (core_rsp_valid_unqual[i]),
.data_in ({core_rsp_tag_unqual[i], core_rsp_data_unqual[i]}),
.ready_in (core_rsp_ready_unqual[i]),
.valid_out (core_rsp_valid[i]),
.data_out ({core_rsp_tag[i],core_rsp_data[i]}),
.ready_out (core_rsp_ready[i])
);
end
assign core_rsp_tmask = core_rsp_valid;
end
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign per_bank_core_rsp_ready[i] = core_rsp_bank_select[i];
end
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
`UNUSED_VAR (per_bank_core_rsp_pmask)
if (NUM_REQS > 1) begin
reg [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
if (CORE_TAG_ID_BITS != 0) begin
reg [NUM_REQS-1:0] core_rsp_tmask_unqual;
always @(*) begin
core_rsp_tmask_unqual = 0;
core_rsp_tmask_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_valid;
core_rsp_tag_unqual = per_bank_core_rsp_tag;
core_rsp_data_unqual = 'x;
core_rsp_data_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_data;
end
assign core_rsp_valid = per_bank_core_rsp_valid;
assign core_rsp_tmask = core_rsp_tmask_unqual;
assign per_bank_core_rsp_ready = core_rsp_ready;
end else begin
reg [`CORE_RSP_TAGS-1:0] core_rsp_valid_unqual;
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_valid_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_valid;
core_rsp_tag_unqual = 'x;
core_rsp_tag_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_tag;
core_rsp_data_unqual = 'x;
core_rsp_data_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_data;
end
assign core_rsp_valid = core_rsp_valid_unqual;
assign core_rsp_tmask = core_rsp_valid_unqual;
assign per_bank_core_rsp_ready = core_rsp_ready[per_bank_core_rsp_tid];
end
assign core_rsp_tag = core_rsp_tag_unqual;
assign core_rsp_data = core_rsp_data_unqual;
end else begin
`UNUSED_VAR(per_bank_core_rsp_tid)
assign core_rsp_valid = per_bank_core_rsp_valid;
assign core_rsp_tmask = per_bank_core_rsp_valid;
assign core_rsp_tag = per_bank_core_rsp_tag;
assign core_rsp_data = per_bank_core_rsp_data;
assign per_bank_core_rsp_ready = core_rsp_ready;
end
end
endmodule

136
hw/rtl/cache/VX_data_access.sv vendored Normal file
View File

@@ -0,0 +1,136 @@
`include "VX_cache_define.vh"
module VX_data_access #(
parameter CACHE_ID = 0,
parameter BANK_ID = 0,
// Size of cache in bytes
parameter CACHE_SIZE = 1,
// Size of line inside a bank in bytes
parameter CACHE_LINE_SIZE = 1,
// Number of banks
parameter NUM_BANKS = 1,
// Number of ports per banks
parameter NUM_PORTS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 1,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
parameter WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
) (
input wire clk,
input wire reset,
`ifdef DBG_CACHE_REQ_INFO
`IGNORE_UNUSED_BEGIN
input wire[31:0] debug_pc,
input wire[`NW_BITS-1:0] debug_wid,
`IGNORE_UNUSED_END
`endif
input wire stall,
input wire read,
input wire fill,
input wire write,
input wire[`LINE_ADDR_WIDTH-1:0] addr,
input wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] wsel,
input wire [NUM_PORTS-1:0] pmask,
input wire [NUM_PORTS-1:0][WORD_SIZE-1:0] byteen,
input wire [`WORDS_PER_LINE-1:0][`WORD_WIDTH-1:0] fill_data,
input wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] write_data,
output wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] read_data
);
`UNUSED_PARAM (CACHE_ID)
`UNUSED_PARAM (BANK_ID)
`UNUSED_PARAM (WORD_SIZE)
`UNUSED_VAR (reset)
`UNUSED_VAR (addr)
`UNUSED_VAR (read)
localparam BYTEENW = WRITE_ENABLE ? CACHE_LINE_SIZE : 1;
wire [`WORDS_PER_LINE-1:0][`WORD_WIDTH-1:0] rdata;
wire [`WORDS_PER_LINE-1:0][`WORD_WIDTH-1:0] wdata;
wire [BYTEENW-1:0] wren;
wire [`LINE_SELECT_BITS-1:0] line_addr = addr[`LINE_SELECT_BITS-1:0];
if (WRITE_ENABLE) begin
if (`WORDS_PER_LINE > 1) begin
reg [`WORDS_PER_LINE-1:0][`WORD_WIDTH-1:0] wdata_r;
reg [`WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_r;
if (NUM_PORTS > 1) begin
always @(*) begin
wdata_r = 'x;
wren_r = 0;
for (integer i = 0; i < NUM_PORTS; ++i) begin
if (pmask[i]) begin
wdata_r[wsel[i]] = write_data[i];
wren_r[wsel[i]] = byteen[i];
end
end
end
end else begin
`UNUSED_VAR (pmask)
always @(*) begin
wdata_r = {`WORDS_PER_LINE{write_data}};
wren_r = 0;
wren_r[wsel] = byteen;
end
end
assign wdata = write ? wdata_r : fill_data;
assign wren = write ? wren_r : {BYTEENW{fill}};
end else begin
`UNUSED_VAR (wsel)
`UNUSED_VAR (pmask)
assign wdata = write ? write_data : fill_data;
assign wren = write ? byteen : {BYTEENW{fill}};
end
end else begin
`UNUSED_VAR (write)
`UNUSED_VAR (byteen)
`UNUSED_VAR (pmask)
`UNUSED_VAR (write_data)
assign wdata = fill_data;
assign wren = fill;
end
VX_sp_ram #(
.DATAW (`CACHE_LINE_WIDTH),
.SIZE (`LINES_PER_BANK),
.BYTEENW (BYTEENW),
.NO_RWCHECK (1)
) data_store (
.clk (clk),
.addr (line_addr),
.wren (wren),
.wdata (wdata),
.rdata (rdata)
);
if (`WORDS_PER_LINE > 1) begin
for (genvar i = 0; i < NUM_PORTS; ++i) begin
assign read_data[i] = rdata[wsel[i]];
end
end else begin
assign read_data = rdata;
end
`UNUSED_VAR (stall)
`ifdef DBG_PRINT_CACHE_DATA
always @(posedge clk) begin
if (fill && ~stall) begin
dpi_trace("%d: cache%0d:%0d data-fill: addr=%0h, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, fill_data);
end
if (read && ~stall) begin
dpi_trace("%d: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, read_data);
end
if (write && ~stall) begin
dpi_trace("%d: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, byteen, line_addr, write_data);
end
end
`endif
endmodule

View File

@@ -1,93 +0,0 @@
`include "VX_cache_define.vh"
module VX_data_access #(
parameter CACHE_ID = 0,
parameter BANK_ID = 0,
// Size of cache in bytes
parameter CACHE_SIZE = 1,
// Size of line inside a bank in bytes
parameter CACHE_LINE_SIZE = 1,
// Number of banks
parameter NUM_BANKS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 1,
// Enable cache writeable
parameter WRITE_ENABLE = 1
) (
input wire clk,
input wire reset,
`ifdef DBG_CACHE_REQ_INFO
`IGNORE_WARNINGS_BEGIN
input wire[31:0] debug_pc,
input wire[`NW_BITS-1:0] debug_wid,
`IGNORE_WARNINGS_END
`endif
`IGNORE_WARNINGS_BEGIN
input wire[`LINE_ADDR_WIDTH-1:0] addr,
`IGNORE_WARNINGS_END
// reading
input wire readen,
output wire [`CACHE_LINE_WIDTH-1:0] rdata,
// writing
input wire writeen,
input wire is_fill,
input wire [CACHE_LINE_SIZE-1:0] byteen,
input wire [`CACHE_LINE_WIDTH-1:0] wdata
);
`UNUSED_PARAM (CACHE_ID)
`UNUSED_PARAM (BANK_ID)
`UNUSED_PARAM (WORD_SIZE)
`UNUSED_VAR (reset)
`UNUSED_VAR (readen)
localparam BYTEENW = WRITE_ENABLE ? CACHE_LINE_SIZE : 1;
wire [`LINE_SELECT_BITS-1:0] line_addr;
wire [BYTEENW-1:0] byte_enable;
assign line_addr = addr[`LINE_SELECT_BITS-1:0];
if (WRITE_ENABLE) begin
assign byte_enable = is_fill ? {BYTEENW{1'b1}} : byteen;
end else begin
`UNUSED_VAR (byteen)
`UNUSED_VAR (is_fill)
assign byte_enable = 1'b1;
end
VX_sp_ram #(
.DATAW (CACHE_LINE_SIZE * 8),
.SIZE (`LINES_PER_BANK),
.BYTEENW (BYTEENW),
.RWCHECK (1)
) data_store (
.clk(clk),
.addr(line_addr),
.wren(writeen),
.byteen(byte_enable),
.rden(1'b1),
.din(wdata),
.dout(rdata)
);
`ifdef DBG_PRINT_CACHE_DATA
always @(posedge clk) begin
if (writeen) begin
if (is_fill) begin
$display("%t: cache%0d:%0d data-fill: addr=%0h, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, wdata);
end else begin
$display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, byte_enable, line_addr, wdata);
end
end
if (readen) begin
$display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, rdata);
end
end
`endif
endmodule

240
hw/rtl/cache/VX_miss_resrv.sv vendored Normal file
View File

@@ -0,0 +1,240 @@
`include "VX_cache_define.vh"
module VX_miss_resrv #(
parameter CACHE_ID = 0,
parameter BANK_ID = 0,
// Number of Word requests per cycle
parameter NUM_REQS = 1,
// Size of line inside a bank in bytes
parameter CACHE_LINE_SIZE = 1,
// Number of banks
parameter NUM_BANKS = 1,
// Number of ports per banks
parameter NUM_PORTS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 1,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 1,
// core request tag size
parameter CORE_TAG_WIDTH = 1,
parameter MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE)
) (
input wire clk,
input wire reset,
`ifdef DBG_CACHE_REQ_INFO
`IGNORE_UNUSED_BEGIN
input wire[31:0] deq_debug_pc,
input wire[`NW_BITS-1:0] deq_debug_wid,
input wire[31:0] lkp_debug_pc,
input wire[`NW_BITS-1:0] lkp_debug_wid,
input wire[31:0] rel_debug_pc,
input wire[`NW_BITS-1:0] rel_debug_wid,
`IGNORE_UNUSED_END
`endif
// allocate
input wire allocate_valid,
input wire [`LINE_ADDR_WIDTH-1:0] allocate_addr,
input wire [`MSHR_DATA_WIDTH-1:0] allocate_data,
output wire [MSHR_ADDR_WIDTH-1:0] allocate_id,
output wire allocate_ready,
// fill
input wire fill_valid,
input wire [MSHR_ADDR_WIDTH-1:0] fill_id,
output wire [`LINE_ADDR_WIDTH-1:0] fill_addr,
// lookup
input wire lookup_valid,
input wire lookup_replay,
input wire [MSHR_ADDR_WIDTH-1:0] lookup_id,
input wire [`LINE_ADDR_WIDTH-1:0] lookup_addr,
output wire lookup_match,
// dequeue
output wire dequeue_valid,
output wire [MSHR_ADDR_WIDTH-1:0] dequeue_id,
output wire [`LINE_ADDR_WIDTH-1:0] dequeue_addr,
output wire [`MSHR_DATA_WIDTH-1:0] dequeue_data,
input wire dequeue_ready,
// release
input wire release_valid,
input wire [MSHR_ADDR_WIDTH-1:0] release_id
);
`UNUSED_PARAM (CACHE_ID)
`UNUSED_PARAM (BANK_ID)
reg [MSHR_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table, addr_table_n;
reg [MSHR_SIZE-1:0] valid_table, valid_table_n;
reg [MSHR_SIZE-1:0] ready_table, ready_table_n;
reg allocate_rdy_r, allocate_rdy_n;
reg [MSHR_ADDR_WIDTH-1:0] allocate_id_r, allocate_id_n;
reg dequeue_val_r, dequeue_val_n, dequeue_val_x;
reg [MSHR_ADDR_WIDTH-1:0] dequeue_id_r, dequeue_id_n, dequeue_id_x;
reg [MSHR_SIZE-1:0] valid_table_x;
reg [MSHR_SIZE-1:0] ready_table_x;
wire [MSHR_SIZE-1:0] addr_matches;
wire allocate_fire = allocate_valid && allocate_ready;
wire dequeue_fire = dequeue_valid && dequeue_ready;
for (genvar i = 0; i < MSHR_SIZE; ++i) begin
assign addr_matches[i] = (addr_table[i] == lookup_addr);
end
always @(*) begin
valid_table_x = valid_table;
ready_table_x = ready_table;
if (dequeue_fire) begin
valid_table_x[dequeue_id] = 0;
end
if (lookup_replay) begin
ready_table_x |= addr_matches;
end
end
VX_lzc #(
.N (MSHR_SIZE)
) dequeue_sel (
.in_i (valid_table_x & ready_table_x),
.cnt_o (dequeue_id_x),
.valid_o (dequeue_val_x)
);
VX_lzc #(
.N (MSHR_SIZE)
) allocate_sel (
.in_i (~valid_table_n),
.cnt_o (allocate_id_n),
.valid_o (allocate_rdy_n)
);
always @(*) begin
valid_table_n = valid_table_x;
ready_table_n = ready_table_x;
addr_table_n = addr_table;
dequeue_val_n = dequeue_val_r;
dequeue_id_n = dequeue_id_r;
if (dequeue_fire) begin
dequeue_val_n = dequeue_val_x;
dequeue_id_n = dequeue_id_x;
end
if (allocate_fire) begin
valid_table_n[allocate_id] = 1;
ready_table_n[allocate_id] = 0;
addr_table_n[allocate_id] = allocate_addr;
end
if (fill_valid) begin
dequeue_val_n = 1;
dequeue_id_n = fill_id;
end
if (release_valid) begin
valid_table_n[release_id] = 0;
end
end
always @(posedge clk) begin
if (reset) begin
valid_table <= 0;
allocate_rdy_r <= 0;
dequeue_val_r <= 0;
end else begin
valid_table <= valid_table_n;
allocate_rdy_r <= allocate_rdy_n;
dequeue_val_r <= dequeue_val_n;
end
ready_table <= ready_table_n;
addr_table <= addr_table_n;
dequeue_id_r <= dequeue_id_n;
allocate_id_r <= allocate_id_n;
`ASSERT(!allocate_fire || !valid_table[allocate_id_r], ("runtime error"));
`ASSERT(!release_valid || valid_table[release_id], ("runtime error"));
end
`RUNTIME_ASSERT((!allocate_fire || ~valid_table[allocate_id]), ("%t: *** cache%0d:%0d in-use allocation: addr=%0h, id=%0d", $time, CACHE_ID, BANK_ID,
`LINE_TO_BYTE_ADDR(allocate_addr, BANK_ID), allocate_id))
`RUNTIME_ASSERT((!fill_valid || valid_table[fill_id]), ("%t: *** cache%0d:%0d invalid fill: addr=%0h, id=%0d", $time, CACHE_ID, BANK_ID,
`LINE_TO_BYTE_ADDR(addr_table[fill_id], BANK_ID), fill_id))
VX_dp_ram #(
.DATAW (`MSHR_DATA_WIDTH),
.SIZE (MSHR_SIZE),
.LUTRAM (1)
) entries (
.clk (clk),
.waddr (allocate_id_r),
.raddr (dequeue_id_r),
.wren (allocate_valid),
.wdata (allocate_data),
.rdata (dequeue_data)
);
assign fill_addr = addr_table[fill_id];
assign allocate_ready = allocate_rdy_r;
assign allocate_id = allocate_id_r;
assign dequeue_valid = dequeue_val_r;
assign dequeue_id = dequeue_id_r;
assign dequeue_addr = addr_table[dequeue_id_r];
wire [MSHR_SIZE-1:0] lookup_entries;
for (genvar i = 0; i < MSHR_SIZE; ++i) begin
assign lookup_entries[i] = (i != lookup_id);
end
assign lookup_match = |(lookup_entries & valid_table & addr_matches);
`UNUSED_VAR (lookup_valid)
`ifdef DBG_PRINT_CACHE_MSHR
always @(posedge clk) begin
if (allocate_fire || fill_valid || dequeue_fire || lookup_replay || lookup_valid || release_valid) begin
if (allocate_fire)
dpi_trace("%d: cache%0d:%0d mshr-allocate: addr=%0h, id=%0d, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID,
`LINE_TO_BYTE_ADDR(allocate_addr, BANK_ID), allocate_id, deq_debug_wid, deq_debug_pc);
if (fill_valid)
dpi_trace("%d: cache%0d:%0d mshr-fill: addr=%0h, id=%0d, addr=%0h\n", $time, CACHE_ID, BANK_ID,
`LINE_TO_BYTE_ADDR(addr_table[fill_id], BANK_ID), fill_id, `LINE_TO_BYTE_ADDR(fill_addr, BANK_ID));
if (dequeue_fire)
dpi_trace("%d: cache%0d:%0d mshr-dequeue: addr=%0h, id=%0d, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID,
`LINE_TO_BYTE_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_debug_wid, deq_debug_pc);
if (lookup_replay)
dpi_trace("%d: cache%0d:%0d mshr-replay: addr=%0h, id=%0d\n", $time, CACHE_ID, BANK_ID,
`LINE_TO_BYTE_ADDR(lookup_addr, BANK_ID), lookup_id);
if (lookup_valid)
dpi_trace("%d: cache%0d:%0d mshr-lookup: addr=%0h, id=%0d, match=%b, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID,
`LINE_TO_BYTE_ADDR(lookup_addr, BANK_ID), lookup_id, lookup_match, lkp_debug_wid, lkp_debug_pc);
if (release_valid)
dpi_trace("%d: cache%0d:%0d mshr-release id=%0d, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID,
release_id, rel_debug_wid, rel_debug_pc);
dpi_trace("%d: cache%0d:%0d mshr-table", $time, CACHE_ID, BANK_ID);
for (integer i = 0; i < MSHR_SIZE; ++i) begin
if (valid_table[i]) begin
dpi_trace(" ");
if (ready_table[i])
dpi_trace("*");
dpi_trace("%0d=%0h", i, `LINE_TO_BYTE_ADDR(addr_table[i], BANK_ID));
end
end
dpi_trace("\n");
end
end
`endif
endmodule

View File

@@ -1,233 +0,0 @@
`include "VX_cache_define.vh"
module VX_miss_resrv #(
parameter CACHE_ID = 0,
parameter BANK_ID = 0,
// Number of Word requests per cycle
parameter NUM_REQS = 1,
// Size of line inside a bank in bytes
parameter CACHE_LINE_SIZE = 1,
// Number of banks
parameter NUM_BANKS = 1,
// Number of ports per banks
parameter NUM_PORTS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 1,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 1,
parameter ALM_FULL = (MSHR_SIZE-1),
// core request tag size
parameter CORE_TAG_WIDTH = 1
) (
input wire clk,
input wire reset,
`ifdef DBG_CACHE_REQ_INFO
`IGNORE_WARNINGS_BEGIN
input wire[31:0] deq_debug_pc,
input wire[`NW_BITS-1:0] deq_debug_wid,
input wire[31:0] enq_debug_pc,
input wire[`NW_BITS-1:0] enq_debug_wid,
`IGNORE_WARNINGS_END
`endif
// enqueue
input wire enqueue,
input wire [`LINE_ADDR_WIDTH-1:0] enqueue_addr,
input wire [`MSHR_DATA_WIDTH-1:0] enqueue_data,
input wire enqueue_is_mshr,
input wire enqueue_as_ready,
output wire enqueue_full,
output wire enqueue_almfull,
// fill
input wire fill_start,
input wire [`LINE_ADDR_WIDTH-1:0] fill_addr,
// lookup
input wire [`LINE_ADDR_WIDTH-1:0] lookup_addr,
output wire lookup_match,
input wire lookup_fill,
// schedule
input wire schedule,
output wire schedule_valid,
output wire [`LINE_ADDR_WIDTH-1:0] schedule_addr,
output wire [`MSHR_DATA_WIDTH-1:0] schedule_data,
// dequeue
input wire dequeue
);
`UNUSED_PARAM (CACHE_ID)
`UNUSED_PARAM (BANK_ID)
localparam ADDRW = $clog2(MSHR_SIZE);
reg [MSHR_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table;
reg [MSHR_SIZE-1:0] valid_table, valid_table_n;
reg [MSHR_SIZE-1:0] ready_table, ready_table_n;
reg [ADDRW-1:0] head_ptr, head_ptr_n;
reg [ADDRW-1:0] tail_ptr, tail_ptr_n;
reg [ADDRW-1:0] restore_ptr, restore_ptr_n;
reg [ADDRW-1:0] schedule_ptr, schedule_ptr_n;
reg [ADDRW-1:0] used_r;
reg alm_full_r, full_r;
reg valid_out_r;
wire [MSHR_SIZE-1:0] valid_address_match;
for (genvar i = 0; i < MSHR_SIZE; i++) begin
assign valid_address_match[i] = valid_table[i] && (addr_table[i] == lookup_addr);
end
wire push_new = enqueue && !enqueue_is_mshr;
wire restore = enqueue && enqueue_is_mshr;
always @(*) begin
valid_table_n = valid_table;
ready_table_n = ready_table;
head_ptr_n = head_ptr;
tail_ptr_n = tail_ptr;
schedule_ptr_n = schedule_ptr;
restore_ptr_n = restore_ptr;
if (lookup_fill) begin
// unlock pending requests for scheduling
ready_table_n |= valid_address_match;
end
if (schedule) begin
// schedule next entry
schedule_ptr_n = schedule_ptr + 1;
valid_table_n[schedule_ptr] = 0;
ready_table_n[schedule_ptr] = 0;
end
if (fill_start && (fill_addr == addr_table[schedule_ptr])) begin
ready_table_n[schedule_ptr] = valid_table[schedule_ptr];
end
if (push_new) begin
// push new entry
valid_table_n[tail_ptr] = 1;
ready_table_n[tail_ptr] = enqueue_as_ready;
tail_ptr_n = tail_ptr + 1;
end else if (restore) begin
// restore schedule, returning missed mshr entry
valid_table_n[restore_ptr] = 1;
ready_table_n[restore_ptr] = enqueue_as_ready;
restore_ptr_n = restore_ptr + 1;
schedule_ptr_n = head_ptr;
end else if (dequeue) begin
// clear scheduled entry
head_ptr_n = head_ptr + 1;
restore_ptr_n = head_ptr_n;
end
end
always @(posedge clk) begin
if (reset) begin
valid_table <= 0;
ready_table <= 0;
head_ptr <= 0;
tail_ptr <= 0;
schedule_ptr <= 0;
restore_ptr <= 0;
used_r <= 0;
alm_full_r <= 0;
full_r <= 0;
valid_out_r <= 0;
end else begin
if (schedule) begin
assert(schedule_valid);
assert(!fill_start);
assert(!restore);
end
if (push_new) begin
assert(!full_r);
end else if (restore) begin
assert(!schedule);
end
if (push_new) begin
if (!dequeue) begin
if (used_r == ADDRW'(ALM_FULL-1))
alm_full_r <= 1;
if (used_r == ADDRW'(MSHR_SIZE-1))
full_r <= 1;
end
end else if (dequeue) begin
if (used_r == ADDRW'(ALM_FULL))
alm_full_r <= 0;
full_r <= 0;
end
used_r <= used_r + ADDRW'($signed(2'(push_new) - 2'(dequeue)));
valid_table <= valid_table_n;
ready_table <= ready_table_n;
head_ptr <= head_ptr_n;
tail_ptr <= tail_ptr_n;
schedule_ptr <= schedule_ptr_n;
restore_ptr <= restore_ptr_n;
valid_out_r <= ready_table_n[schedule_ptr_n];
end
if (push_new) begin
addr_table[tail_ptr] <= enqueue_addr;
end
end
VX_dp_ram #(
.DATAW (`MSHR_DATA_WIDTH),
.SIZE (MSHR_SIZE),
.RWCHECK (1),
.FASTRAM (1)
) entries (
.clk (clk),
.waddr (tail_ptr),
.raddr (schedule_ptr),
.wren (push_new),
.byteen (1'b1),
.rden (1'b1),
.din (enqueue_data),
.dout (schedule_data)
);
assign lookup_match = (| valid_address_match);
assign schedule_valid = valid_out_r;
assign schedule_addr = addr_table[schedule_ptr];
assign enqueue_almfull = alm_full_r;
assign enqueue_full = full_r;
`ifdef DBG_PRINT_CACHE_MSHR
always @(posedge clk) begin
if (lookup_fill || schedule || enqueue || dequeue) begin
if (schedule)
$display("%t: cache%0d:%0d mshr-schedule: addr%0d=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, schedule_ptr, `LINE_TO_BYTE_ADDR(schedule_addr, BANK_ID), deq_debug_wid, deq_debug_pc);
if (enqueue) begin
if (enqueue_is_mshr)
$display("%t: cache%0d:%0d mshr-restore: addr%0d=%0h, ready=%b", $time, CACHE_ID, BANK_ID, restore_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr, BANK_ID), enqueue_as_ready);
else
$display("%t: cache%0d:%0d mshr-enqueue: addr%0d=%0h, ready=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, tail_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr, BANK_ID), enqueue_as_ready, enq_debug_wid, enq_debug_pc);
end
if (dequeue)
$display("%t: cache%0d:%0d mshr-dequeue addr%0d, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, head_ptr, enq_debug_wid, enq_debug_pc);
$write("%t: cache%0d:%0d mshr-table", $time, CACHE_ID, BANK_ID);
for (integer j = 0; j < MSHR_SIZE; j++) begin
if (valid_table[j]) begin
$write(" ");
if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*");
if (~ready_table[j]) $write("!");
$write("addr%0d=%0h", j, `LINE_TO_BYTE_ADDR(addr_table[j], BANK_ID));
end
end
$write("\n");
end
end
`endif
endmodule

View File

@@ -1,6 +1,7 @@
`include "VX_cache_define.vh"
module VX_nc_bypass #(
parameter NUM_PORTS = 1,
parameter NUM_REQS = 1,
parameter NUM_RSP_TAGS = 0,
parameter NC_TAG_BIT = 0,
@@ -10,13 +11,14 @@ module VX_nc_bypass #(
parameter CORE_TAG_IN_WIDTH = 1,
parameter MEM_ADDR_WIDTH = 1,
parameter MEM_DATA_SIZE = 1,
parameter MEM_DATA_SIZE = 1,
parameter MEM_TAG_IN_WIDTH = 1,
parameter MEM_TAG_OUT_WIDTH = 1,
localparam CORE_DATA_WIDTH = CORE_DATA_SIZE * 8,
localparam MEM_DATA_WIDTH = MEM_DATA_SIZE * 8,
localparam CORE_TAG_OUT_WIDTH = CORE_TAG_IN_WIDTH - 1
parameter CORE_DATA_WIDTH = CORE_DATA_SIZE * 8,
parameter MEM_DATA_WIDTH = MEM_DATA_SIZE * 8,
parameter CORE_TAG_OUT_WIDTH = CORE_TAG_IN_WIDTH - 1,
parameter MEM_SELECT_BITS = `UP(`CLOG2(MEM_DATA_SIZE / CORE_DATA_SIZE))
) (
input wire clk,
input wire reset,
@@ -57,8 +59,10 @@ module VX_nc_bypass #(
input wire mem_req_valid_in,
input wire mem_req_rw_in,
input wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_in,
input wire [MEM_DATA_SIZE-1:0] mem_req_byteen_in,
input wire [MEM_DATA_WIDTH-1:0] mem_req_data_in,
input wire [NUM_PORTS-1:0] mem_req_pmask_in,
input wire [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_in,
input wire [NUM_PORTS-1:0][MEM_SELECT_BITS-1:0] mem_req_wsel_in,
input wire [NUM_PORTS-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in,
input wire [MEM_TAG_IN_WIDTH-1:0] mem_req_tag_in,
output wire mem_req_ready_in,
@@ -66,8 +70,10 @@ module VX_nc_bypass #(
output wire mem_req_valid_out,
output wire mem_req_rw_out,
output wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_out,
output wire [MEM_DATA_SIZE-1:0] mem_req_byteen_out,
output wire [MEM_DATA_WIDTH-1:0] mem_req_data_out,
output wire [NUM_PORTS-1:0] mem_req_pmask_out,
output wire [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_out,
output wire [NUM_PORTS-1:0][MEM_SELECT_BITS-1:0] mem_req_wsel_out,
output wire [NUM_PORTS-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_out,
output wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_tag_out,
input wire mem_req_ready_out,
@@ -99,9 +105,9 @@ module VX_nc_bypass #(
// core request handling
wire [NUM_REQS-1:0] core_req_valid_in_nc;
wire [NUM_REQS-1:0] core_req_nc_sel;
wire [NUM_REQS-1:0] core_req_nc_tids;
wire [`UP(CORE_REQ_TIDW)-1:0] core_req_nc_tid;
wire [NUM_REQS-1:0] core_req_nc_sel;
wire core_req_nc_valid;
for (genvar i = 0; i < NUM_REQS; ++i) begin
@@ -142,7 +148,6 @@ module VX_nc_bypass #(
(~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i]) : core_req_ready_out[i];
end
end else begin
`UNUSED_VAR (core_req_nc_sel)
assign core_req_ready_in = core_req_valid_in_nc ? (~mem_req_valid_in && mem_req_ready_out) : core_req_ready_out;
end
@@ -151,7 +156,7 @@ module VX_nc_bypass #(
assign mem_req_valid_out = mem_req_valid_in || core_req_nc_valid;
assign mem_req_ready_in = mem_req_ready_out;
wire [(MEM_TAG_IN_WIDTH+1)-1:0] mem_req_tag_in_nc;
wire [(MEM_TAG_IN_WIDTH+1)-1:0] mem_req_tag_in_c;
VX_bits_insert #(
.N (MEM_TAG_IN_WIDTH),
@@ -160,81 +165,69 @@ module VX_nc_bypass #(
) mem_req_tag_insert (
.data_in (mem_req_tag_in),
.sel_in ('0),
.data_out (mem_req_tag_in_nc)
.data_out (mem_req_tag_in_c)
);
wire [CORE_TAG_IN_WIDTH-1:0] core_req_tag_in_sel;
wire [CORE_DATA_WIDTH-1:0] core_req_data_in_sel;
wire [CORE_DATA_SIZE-1:0] core_req_byteen_in_sel;
wire [CORE_ADDR_WIDTH-1:0] core_req_addr_in_sel;
wire core_req_rw_in_sel;
if (NUM_REQS > 1) begin
wire [CORE_TAG_IN_WIDTH-1:0] core_req_tag_in_sel;
wire [CORE_DATA_WIDTH-1:0] core_req_data_in_sel;
wire [CORE_DATA_SIZE-1:0] core_req_byteen_in_sel;
wire [CORE_ADDR_WIDTH-1:0] core_req_addr_in_sel;
wire core_req_rw_in_sel;
wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_nc_mux_in[i] = {core_req_tag_in[i], core_req_data_in[i], core_req_byteen_in[i], core_req_addr_in[i], core_req_rw_in[i]};
end
VX_onehot_mux #(
.DATAW (MUX_DATAW),
.N (NUM_REQS)
) core_req_nc_mux (
.data_in (core_req_nc_mux_in),
.sel_in (core_req_nc_sel),
.data_out ({core_req_tag_in_sel, core_req_data_in_sel, core_req_byteen_in_sel, core_req_addr_in_sel, core_req_rw_in_sel})
);
assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in_sel;
assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in_sel[D +: MEM_ADDR_WIDTH];
for (genvar i = 0; i < P; ++i) begin
assign mem_req_data_out[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = mem_req_valid_in ?
mem_req_data_in[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_req_data_in_sel;
end
if (D != 0) begin
wire [D-1:0] req_addr_idx = core_req_addr_in_sel[D-1:0];
reg [MEM_DATA_SIZE-1:0] mem_req_byteen_in_r;
always @(*) begin
mem_req_byteen_in_r = 0;
mem_req_byteen_in_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in_sel;
end
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r;
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_nc) : MEM_TAG_OUT_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in_sel});
end else begin
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in_sel;
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_nc) : MEM_TAG_OUT_WIDTH'({core_req_nc_tid, core_req_tag_in_sel});
end
assign {core_req_tag_in_sel, core_req_data_in_sel, core_req_byteen_in_sel, core_req_addr_in_sel, core_req_rw_in_sel} = core_req_nc_mux_in[core_req_nc_tid];
end else begin
`UNUSED_VAR (core_req_nc_tid)
assign core_req_tag_in_sel = core_req_tag_in;
assign core_req_data_in_sel = core_req_data_in;
assign core_req_byteen_in_sel = core_req_byteen_in;
assign core_req_addr_in_sel = core_req_addr_in;
assign core_req_rw_in_sel = core_req_rw_in;
end
assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in_sel;
assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in_sel[D +: MEM_ADDR_WIDTH];
if (D != 0) begin
reg [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_in_r;
reg [NUM_PORTS-1:0][MEM_SELECT_BITS-1:0] mem_req_wsel_in_r;
reg [NUM_PORTS-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r;
wire [D-1:0] req_addr_idx = core_req_addr_in_sel[D-1:0];
assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in;
assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in[0][D +: MEM_ADDR_WIDTH];
always @(*) begin
mem_req_byteen_in_r = 0;
mem_req_byteen_in_r[0] = core_req_byteen_in_sel;
for (genvar i = 0; i < P; ++i) begin
assign mem_req_data_out[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = mem_req_valid_in ?
mem_req_data_in[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_req_data_in;
mem_req_wsel_in_r = 'x;
mem_req_wsel_in_r[0] = req_addr_idx;
mem_req_data_in_r = 'x;
mem_req_data_in_r[0] = core_req_data_in_sel;
end
if (D != 0) begin
wire [D-1:0] req_addr_idx = core_req_addr_in[0][D-1:0];
reg [MEM_DATA_SIZE-1:0] mem_req_byteen_in_r;
always @(*) begin
mem_req_byteen_in_r = 0;
mem_req_byteen_in_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in;
end
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r;
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_nc) : MEM_TAG_OUT_WIDTH'({req_addr_idx, core_req_tag_in});
end else begin
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in;
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_nc) : MEM_TAG_OUT_WIDTH'(core_req_tag_in);
end
assign mem_req_pmask_out = mem_req_valid_in ? mem_req_pmask_in : NUM_PORTS'(1'b1);
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r;
assign mem_req_wsel_out = mem_req_valid_in ? mem_req_wsel_in : mem_req_wsel_in_r;
assign mem_req_data_out = mem_req_valid_in ? mem_req_data_in : mem_req_data_in_r;
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_c) : MEM_TAG_OUT_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in_sel});
end else begin
`UNUSED_VAR (mem_req_wsel_in)
`UNUSED_VAR (mem_req_pmask_in)
assign mem_req_pmask_out = 0;
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in_sel;
assign mem_req_data_out = mem_req_valid_in ? mem_req_data_in : core_req_data_in_sel;
assign mem_req_wsel_out = 0;
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_c) : MEM_TAG_OUT_WIDTH'({core_req_nc_tid, core_req_tag_in_sel});
end
// core response handling
wire [NUM_RSP_TAGS-1:0][CORE_TAG_IN_WIDTH-1:0] core_rsp_tag_out_unqual;
wire [NUM_RSP_TAGS-1:0][CORE_TAG_IN_WIDTH-1:0] core_rsp_tag_out_c;
wire is_mem_rsp_nc = mem_rsp_valid_in && mem_rsp_tag_in[NC_TAG_BIT];
@@ -246,7 +239,7 @@ module VX_nc_bypass #(
) core_rsp_tag_insert (
.data_in (core_rsp_tag_in[i]),
.sel_in ('0),
.data_out (core_rsp_tag_out_unqual[i])
.data_out (core_rsp_tag_out_c[i])
);
end
@@ -272,14 +265,14 @@ module VX_nc_bypass #(
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_data_out[i] = core_rsp_valid_in[i] ? core_rsp_data_in[i] : mem_rsp_data_in;
end
end
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_tag_out[i] = core_rsp_valid_in[i] ? core_rsp_tag_out_unqual[i] : mem_rsp_tag_in[CORE_TAG_IN_WIDTH-1:0];
assign core_rsp_tag_out[i] = core_rsp_valid_in[i] ? core_rsp_tag_out_c[i] : mem_rsp_tag_in[CORE_TAG_IN_WIDTH-1:0];
end
end else begin
assign core_rsp_valid_out = core_rsp_valid_in || is_mem_rsp_nc;
assign core_rsp_tag_out = core_rsp_valid_in ? core_rsp_tag_out_unqual : mem_rsp_tag_in[CORE_TAG_IN_WIDTH-1:0];
assign core_rsp_tag_out = core_rsp_valid_in ? core_rsp_tag_out_c : mem_rsp_tag_in[CORE_TAG_IN_WIDTH-1:0];
assign core_rsp_ready_in = core_rsp_ready_out;
if (NUM_REQS > 1) begin

View File

@@ -24,14 +24,14 @@ module VX_shared_mem #(
parameter CORE_TAG_WIDTH = (2 + CORE_TAG_ID_BITS),
// bank offset from beginning of index range
parameter BANK_ADDR_OFFSET = `CLOG2(256)
parameter BANK_ADDR_OFFSET = `CLOG2(256)
) (
input wire clk,
input wire reset,
// PERF
`ifdef PERF_ENABLE
VX_perf_cache_if perf_cache_if,
VX_perf_cache_if.master perf_cache_if,
`endif
// Core request
@@ -64,7 +64,7 @@ module VX_shared_mem #(
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data_unqual;
wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_unqual;
wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_unqual;
wire per_bank_core_req_ready_unqual;
wire [NUM_BANKS-1:0] per_bank_core_req_ready_unqual;
VX_core_req_bank_sel #(
.CACHE_ID (CACHE_ID),
@@ -74,8 +74,7 @@ module VX_shared_mem #(
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.BANK_ADDR_OFFSET(BANK_ADDR_OFFSET),
.SHARED_BANK_READY(1)
.BANK_ADDR_OFFSET(BANK_ADDR_OFFSET)
) core_req_bank_sel (
.clk (clk),
.reset (reset),
@@ -103,41 +102,34 @@ module VX_shared_mem #(
wire [NUM_BANKS-1:0] per_bank_core_req_valid;
wire [NUM_BANKS-1:0] per_bank_core_req_rw;
wire [NUM_BANKS-1:0][`LINE_SELECT_BITS-1:0] per_bank_core_req_addr;
wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr;
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen;
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data;
wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag;
wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid;
wire creq_in_ready;
wire creq_out_valid;
wire crsq_in_fire_last;
wire [NUM_BANKS-1:0] per_bank_req_reads = per_bank_core_req_valid & ~per_bank_core_req_rw;
wire per_bank_req_has_reads = (| per_bank_req_reads);
wire creq_in_valid = (| core_req_valid);
wire creq_out_ready = ~per_bank_req_has_reads // is write only
|| crsq_in_fire_last; // is sending last read response
assign per_bank_core_req_ready_unqual = creq_in_ready;
wire creq_out_valid, creq_out_ready;
wire creq_in_valid, creq_in_ready;
wire creq_in_fire = creq_in_valid && creq_in_ready;
`UNUSED_VAR (creq_in_fire)
wire creq_out_fire = creq_out_valid && creq_out_ready;
`UNUSED_VAR (creq_out_fire)
wire [NUM_BANKS-1:0][`LINE_SELECT_BITS-1:0] per_bank_core_req_addr_qual;
`UNUSED_VAR (per_bank_core_req_addr_unqual)
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign per_bank_core_req_addr_qual[i] = per_bank_core_req_addr_unqual[i][`LINE_SELECT_BITS-1:0];
end
assign creq_in_valid = (| core_req_valid);
assign per_bank_core_req_ready_unqual = {NUM_BANKS{creq_in_ready}};
wire [NUM_BANKS-1:0] core_req_read_mask, core_req_read_mask_unqual;
wire core_req_writeonly, core_req_writeonly_unqual;
assign core_req_read_mask_unqual = per_bank_core_req_valid_unqual & ~per_bank_core_req_rw_unqual;
assign core_req_writeonly_unqual = ~(| core_req_read_mask_unqual);
VX_elastic_buffer #(
.DATAW (NUM_BANKS * (1 + 1 + `LINE_SELECT_BITS + WORD_SIZE + `WORD_WIDTH + CORE_TAG_WIDTH + `REQS_BITS)),
.SIZE (CREQ_SIZE),
.OUTPUT_REG (1) // output should be registered for the data_store addr port
.DATAW (NUM_BANKS * (1 + 1 + `LINE_ADDR_WIDTH + WORD_SIZE + `WORD_WIDTH + CORE_TAG_WIDTH + `REQS_BITS) + NUM_BANKS + 1),
.SIZE (CREQ_SIZE),
.OUT_REG (1) // output should be registered for the data_store addr port
) core_req_queue (
.clk (clk),
.reset (reset),
@@ -145,44 +137,53 @@ module VX_shared_mem #(
.valid_in (creq_in_valid),
.data_in ({per_bank_core_req_valid_unqual,
per_bank_core_req_rw_unqual,
per_bank_core_req_addr_qual,
per_bank_core_req_addr_unqual,
per_bank_core_req_byteen_unqual,
per_bank_core_req_data_unqual,
per_bank_core_req_tag_unqual,
per_bank_core_req_tid_unqual}),
per_bank_core_req_tid_unqual,
core_req_read_mask_unqual,
core_req_writeonly_unqual}),
.data_out ({per_bank_core_req_valid,
per_bank_core_req_rw,
per_bank_core_req_addr,
per_bank_core_req_byteen,
per_bank_core_req_data,
per_bank_core_req_tag,
per_bank_core_req_tid}),
per_bank_core_req_tid,
core_req_read_mask,
core_req_writeonly}),
.ready_out (creq_out_ready),
.valid_out (creq_out_valid)
);
`UNUSED_VAR (creq_in_fire)
wire crsq_in_valid, crsq_in_ready;
wire crsq_last_read;
assign creq_out_ready = core_req_writeonly
|| (crsq_in_ready && crsq_last_read);
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data;
for (genvar i = 0; i < NUM_BANKS; i++) begin
wire wren = per_bank_core_req_rw[i]
&& per_bank_core_req_valid[i]
&& creq_out_fire;
wire [WORD_SIZE-1:0] wren = per_bank_core_req_byteen[i]
& {WORD_SIZE{per_bank_core_req_valid[i]
&& per_bank_core_req_rw[i]}};
wire [`LINE_SELECT_BITS-1:0] addr = per_bank_core_req_addr[i][`LINE_SELECT_BITS-1:0];
VX_sp_ram #(
.DATAW (`WORD_WIDTH),
.SIZE (`LINES_PER_BANK),
.BYTEENW (WORD_SIZE),
.RWCHECK (1)
.DATAW (`WORD_WIDTH),
.SIZE (`LINES_PER_BANK),
.BYTEENW (WORD_SIZE),
.NO_RWCHECK (1)
) data_store (
.clk (clk),
.addr (per_bank_core_req_addr[i]),
.wren (wren),
.byteen (per_bank_core_req_byteen[i]),
.rden (1'b1),
.din (per_bank_core_req_data[i]),
.dout (per_bank_core_rsp_data[i])
.clk (clk),
.addr (addr),
.wren (wren),
.wdata (per_bank_core_req_data[i]),
.rdata (per_bank_core_rsp_data[i])
);
end
@@ -190,57 +191,54 @@ module VX_shared_mem #(
// We first need to select the current tag to process,
// then send all bank responses for that tag as a batch
wire crsq_in_valid, crsq_in_ready;
reg [NUM_BANKS-1:0] bank_rsp_sel_prv, bank_rsp_sel_cur;
wire [NUM_BANKS-1:0] bank_rsp_sel_n = bank_rsp_sel_prv | bank_rsp_sel_cur;
reg [NUM_REQS-1:0] core_rsp_valids_in;
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_in;
wire [CORE_TAG_WIDTH-1:0] core_rsp_tag_in;
reg [NUM_BANKS-1:0] bank_rsp_sel_r, bank_rsp_sel_n;
wire crsq_in_fire = crsq_in_valid && crsq_in_ready;
assign crsq_in_fire_last = crsq_in_fire && (bank_rsp_sel_n == per_bank_req_reads);
assign crsq_last_read = (bank_rsp_sel_n == core_req_read_mask);
always @(posedge clk) begin
if (reset) begin
bank_rsp_sel_prv <= 0;
bank_rsp_sel_r <= 0;
end else begin
if (crsq_in_fire) begin
if (bank_rsp_sel_n == per_bank_req_reads) begin
bank_rsp_sel_prv <= 0;
if (crsq_last_read) begin
bank_rsp_sel_r <= 0;
end else begin
bank_rsp_sel_prv <= bank_rsp_sel_n;
bank_rsp_sel_r <= bank_rsp_sel_n;
end
end
end
end
reg [NUM_REQS-1:0] core_rsp_valids_in;
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_in;
reg [CORE_TAG_WIDTH-1:0] core_rsp_tag_in;
always @(*) begin
end
VX_find_first #(
.N (NUM_BANKS),
.DATAW (CORE_TAG_WIDTH)
) find_first (
.valid_i (core_req_read_mask & ~bank_rsp_sel_r),
.data_i (per_bank_core_req_tag),
.data_o (core_rsp_tag_in),
`UNUSED_PIN (valid_o)
);
always @(*) begin
core_rsp_valids_in = 0;
core_rsp_data_in = 'x;
core_rsp_tag_in = 'x;
bank_rsp_sel_cur = 0;
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
if (per_bank_req_reads[i] && ~bank_rsp_sel_prv[i]) begin
core_rsp_tag_in = per_bank_core_req_tag[i];
end
end
bank_rsp_sel_n = bank_rsp_sel_r;
for (integer i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_req_valid[i]
&& (core_rsp_tag_in[CORE_TAG_ID_BITS-1:0] == per_bank_core_req_tag[i][CORE_TAG_ID_BITS-1:0])) begin
core_rsp_valids_in[per_bank_core_req_tid[i]] = 1;
core_rsp_data_in[per_bank_core_req_tid[i]] = per_bank_core_rsp_data[i];
bank_rsp_sel_cur[i] = 1;
bank_rsp_sel_n[i] = 1;
end
end
end
assign crsq_in_valid = creq_out_valid && per_bank_req_has_reads;
assign crsq_in_valid = creq_out_valid && ~core_req_writeonly;
VX_elastic_buffer #(
.DATAW (NUM_BANKS * (1 + `WORD_WIDTH) + CORE_TAG_WIDTH),
@@ -257,10 +255,10 @@ module VX_shared_mem #(
);
`ifdef DBG_CACHE_REQ_INFO
`IGNORE_WARNINGS_BEGIN
`IGNORE_UNUSED_BEGIN
wire [NUM_BANKS-1:0][31:0] debug_pc_st0, debug_pc_st1;
wire [NUM_BANKS-1:0][`NW_BITS-1:0] debug_wid_st0, debug_wid_st1;
`IGNORE_WARNINGS_END
`IGNORE_UNUSED_END
for (genvar i = 0; i < NUM_BANKS; ++i) begin
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
@@ -276,17 +274,21 @@ module VX_shared_mem #(
`ifdef DBG_PRINT_CACHE_BANK
reg is_multi_tag_req;
`IGNORE_WARNINGS_BEGIN
`IGNORE_UNUSED_BEGIN
reg [CORE_TAG_WIDTH-1:0] core_req_tag_sel;
`IGNORE_WARNINGS_END
`IGNORE_UNUSED_END
always @(*) begin
core_req_tag_sel ='x;
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
if (per_bank_core_req_valid[i]) begin
core_req_tag_sel = per_bank_core_req_tag[i];
end
end
VX_find_first #(
.N (NUM_BANKS),
.DATAW (CORE_TAG_WIDTH)
) find_first_d (
.valid_i (per_bank_core_req_valid),
.data_i (per_bank_core_req_tag),
.data_o (core_req_tag_sel),
`UNUSED_PIN (valid_o)
);
always @(*) begin
is_multi_tag_req = 0;
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_req_valid[i]
@@ -298,22 +300,20 @@ module VX_shared_mem #(
always @(posedge clk) begin
if (!crsq_in_ready) begin
$display("%t: *** cache%0d pipeline-stall", $time, CACHE_ID);
dpi_trace("%d: *** cache%0d pipeline-stall\n", $time, CACHE_ID);
end
if (is_multi_tag_req) begin
$display("%t: *** cache%0d multi-tag request!", $time, CACHE_ID);
dpi_trace("%d: *** cache%0d multi-tag request!\n", $time, CACHE_ID);
end
if (creq_in_fire) begin
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_req_valid_unqual[i]) begin
if (per_bank_core_req_rw_unqual[i]) begin
$display("%t: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h",
$time, CACHE_ID, i, per_bank_core_req_addr_unqual[i], per_bank_core_req_tag_unqual[i], per_bank_core_req_byteen_unqual[i], per_bank_core_req_data_unqual[i],
debug_wid_st0[i], debug_pc_st0[i]);
dpi_trace("%d: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h\n",
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr_unqual[i], i), per_bank_core_req_tag_unqual[i], per_bank_core_req_byteen_unqual[i], per_bank_core_req_data_unqual[i], debug_wid_st0[i], debug_pc_st0[i]);
end else begin
$display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, byteen=%b, wid=%0d, PC=%0h",
$time, CACHE_ID, i, per_bank_core_req_addr_unqual[i], per_bank_core_req_tag_unqual[i], per_bank_core_req_byteen_unqual[i],
debug_wid_st0[i], debug_pc_st0[i]);
dpi_trace("%d: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, byteen=%b, wid=%0d, PC=%0h\n",
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr_unqual[i], i), per_bank_core_req_tag_unqual[i], per_bank_core_req_byteen_unqual[i], debug_wid_st0[i], debug_pc_st0[i]);
end
end
end
@@ -322,13 +322,11 @@ module VX_shared_mem #(
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_req_valid[i]) begin
if (per_bank_core_req_rw[i]) begin
$display("%t: cache%0d:%0d core-wr-rsp: addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h",
$time, CACHE_ID, i, per_bank_core_req_addr[i], per_bank_core_req_tag[i], per_bank_core_req_byteen[i], per_bank_core_req_data[i],
debug_wid_st1[i], debug_pc_st1[i]);
dpi_trace("%d: cache%0d:%0d core-wr-rsp: addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h\n",
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr[i], i), per_bank_core_req_tag[i], per_bank_core_req_byteen[i], per_bank_core_req_data[i], debug_wid_st1[i], debug_pc_st1[i]);
end else begin
$display("%t: cache%0d:%0d core-rd-rsp: addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h",
$time, CACHE_ID, i, per_bank_core_req_addr[i], per_bank_core_req_tag[i], per_bank_core_req_byteen[i], per_bank_core_rsp_data[i],
debug_wid_st1[i], debug_pc_st1[i]);
dpi_trace("%d: cache%0d:%0d core-rd-rsp: addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h\n",
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr[i], i), per_bank_core_req_tag[i], per_bank_core_req_byteen[i], per_bank_core_rsp_data[i], debug_wid_st1[i], debug_pc_st1[i]);
end
end
end
@@ -338,16 +336,22 @@ module VX_shared_mem #(
`ifdef PERF_ENABLE
// per cycle: core_reads, core_writes
reg [($clog2(NUM_REQS+1)-1):0] perf_core_reads_per_cycle, perf_core_writes_per_cycle;
reg [($clog2(NUM_REQS+1)-1):0] perf_crsp_stall_per_cycle;
wire [$clog2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle;
wire [$clog2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle;
wire [$clog2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle;
assign perf_core_reads_per_cycle = $countones(core_req_valid & core_req_ready & ~core_req_rw);
assign perf_core_writes_per_cycle = $countones(core_req_valid & core_req_ready & core_req_rw);
wire [NUM_REQS-1:0] perf_core_reads_per_mask = core_req_valid & core_req_ready & ~core_req_rw;
wire [NUM_REQS-1:0] perf_core_writes_per_mask = core_req_valid & core_req_ready & core_req_rw;
`POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_mask);
`POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_mask);
if (CORE_TAG_ID_BITS != 0) begin
assign perf_crsp_stall_per_cycle = $countones(core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}});
wire [NUM_REQS-1:0] perf_crsp_stall_per_mask = core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}};
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_mask);
end else begin
assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & ~core_rsp_ready);
wire [NUM_REQS-1:0] perf_crsp_stall_per_mask = core_rsp_valid & ~core_rsp_ready;
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_mask);
end
reg [`PERF_CTR_BITS-1:0] perf_core_reads;

82
hw/rtl/cache/VX_tag_access.sv vendored Normal file
View File

@@ -0,0 +1,82 @@
`include "VX_cache_define.vh"
module VX_tag_access #(
parameter CACHE_ID = 0,
parameter BANK_ID = 0,
// Size of cache in bytes
parameter CACHE_SIZE = 1,
// Size of line inside a bank in bytes
parameter CACHE_LINE_SIZE = 1,
// Number of banks
parameter NUM_BANKS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 1,
// bank offset from beginning of index range
parameter BANK_ADDR_OFFSET = 0
) (
input wire clk,
input wire reset,
`ifdef DBG_CACHE_REQ_INFO
`IGNORE_UNUSED_BEGIN
input wire[31:0] debug_pc,
input wire[`NW_BITS-1:0] debug_wid,
`IGNORE_UNUSED_END
`endif
input wire stall,
// read/fill
input wire lookup,
input wire[`LINE_ADDR_WIDTH-1:0] addr,
input wire fill,
input wire flush,
output wire tag_match
);
`UNUSED_PARAM (CACHE_ID)
`UNUSED_PARAM (BANK_ID)
`UNUSED_VAR (reset)
`UNUSED_VAR (lookup)
wire [`TAG_SELECT_BITS-1:0] read_tag;
wire read_valid;
wire [`LINE_SELECT_BITS-1:0] line_addr = addr[`LINE_SELECT_BITS-1:0];
wire [`TAG_SELECT_BITS-1:0] line_tag = `LINE_TAG_ADDR(addr);
VX_sp_ram #(
.DATAW (`TAG_SELECT_BITS + 1),
.SIZE (`LINES_PER_BANK),
.NO_RWCHECK (1)
) tag_store (
.clk( clk),
.addr (line_addr),
.wren (fill || flush),
.wdata ({!flush, line_tag}),
.rdata ({read_valid, read_tag})
);
assign tag_match = read_valid && (line_tag == read_tag);
`UNUSED_VAR (stall)
`ifdef DBG_PRINT_CACHE_TAG
always @(posedge clk) begin
if (fill && ~stall) begin
dpi_trace("%d: cache%0d:%0d tag-fill: addr=%0h, blk_addr=%0d, tag_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, line_tag);
end
if (flush) begin
dpi_trace("%d: cache%0d:%0d tag-flush: addr=%0h, blk_addr=%0d\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr);
end
if (lookup && ~stall) begin
if (tag_match) begin
dpi_trace("%d: cache%0d:%0d tag-hit: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, tag_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, line_tag);
end else begin
dpi_trace("%d: cache%0d:%0d tag-miss: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, line_tag, read_tag);
end
end
end
`endif
endmodule

View File

@@ -1,84 +0,0 @@
`include "VX_cache_define.vh"
module VX_tag_access #(
parameter CACHE_ID = 0,
parameter BANK_ID = 0,
// Size of cache in bytes
parameter CACHE_SIZE = 1,
// Size of line inside a bank in bytes
parameter CACHE_LINE_SIZE = 1,
// Number of banks
parameter NUM_BANKS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 1,
// bank offset from beginning of index range
parameter BANK_ADDR_OFFSET = 0
) (
input wire clk,
input wire reset,
`ifdef DBG_CACHE_REQ_INFO
`IGNORE_WARNINGS_BEGIN
input wire[31:0] debug_pc,
input wire[`NW_BITS-1:0] debug_wid,
`IGNORE_WARNINGS_END
`endif
// read/fill
input wire lookup,
input wire[`LINE_ADDR_WIDTH-1:0] addr,
input wire fill,
input wire is_flush,
output wire tag_match
);
`UNUSED_PARAM (CACHE_ID)
`UNUSED_PARAM (BANK_ID)
`UNUSED_VAR (reset)
`UNUSED_VAR (lookup)
wire read_valid;
wire [`TAG_SELECT_BITS-1:0] read_tag;
wire [`TAG_SELECT_BITS-1:0] line_tag = `LINE_TAG_ADDR(addr);
wire [`LINE_SELECT_BITS-1:0] line_addr = addr [`LINE_SELECT_BITS-1:0];
VX_sp_ram #(
.DATAW(`TAG_SELECT_BITS + 1),
.SIZE(`LINES_PER_BANK),
.INITZERO(1),
.RWCHECK(1)
) tag_store (
.clk(clk),
.addr(line_addr),
.wren(fill),
.byteen(1'b1),
.rden(1'b1),
.din({!is_flush, line_tag}),
.dout({read_valid, read_tag})
);
assign tag_match = read_valid && (line_tag == read_tag);
`ifdef DBG_PRINT_CACHE_TAG
always @(posedge clk) begin
if (fill) begin
if (is_flush) begin
$display("%t: cache%0d:%0d tag-flush: addr=%0h, blk_addr=%0d", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr);
end else begin
$display("%t: cache%0d:%0d tag-fill: addr=%0h, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, line_tag, read_tag);
if (tag_match) begin
$display("%t: warning: redundant fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr, BANK_ID));
end
end
end else if (lookup) begin
if (tag_match) begin
$display("%t: cache%0d:%0d tag-hit: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, line_tag);
end else begin
$display("%t: cache%0d:%0d tag-miss: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, line_tag, read_tag);
end
end
end
`endif
endmodule