Vortex 2.0 changes:

+ Microarchitecture optimizations
+ 64-bit support
+ Xilinx FPGA support
+ LLVM-16 support
+ Refactoring and quality control fixes

minor update

minor update

minor update

minor update

minor update

minor update

cleanup

cleanup

cache bindings and memory perf refactory

minor update

minor update

hw unit tests fixes

minor update

minor update

minor update

minor update

minor update

minor udpate

minor update

minor update

minor update

minor update

minor update

minor update

minor update

minor updates

minor updates

minor update

minor update

minor update

minor update

minor update

minor update

minor updates

minor updates

minor updates

minor updates

minor update

minor update
This commit is contained in:
Blaise Tine
2023-10-19 20:51:22 -07:00
parent d69a64c32c
commit c1e168fdbe
1309 changed files with 247412 additions and 311463 deletions

View File

@@ -1,511 +0,0 @@
`include "VX_cache_define.vh"
module VX_bank #(
parameter CACHE_ID = 0,
parameter BANK_ID = 0,
// Number of Word requests per cycle
parameter NUM_REQS = 1,
// Size of cache in bytes
parameter CACHE_SIZE = 1,
// Size of line inside a bank in bytes
parameter CACHE_LINE_SIZE = 1,
// Number of bankS
parameter NUM_BANKS = 1,
// Number of ports per banks
parameter NUM_PORTS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 1,
// Core Request Queue Size
parameter CREQ_SIZE = 1,
// Core Response Queue Size
parameter CRSQ_SIZE = 1,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 1,
// Memory Request Queue Size
parameter MREQ_SIZE = 1,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// core request tag size
parameter CORE_TAG_WIDTH = 1,
// bank offset from beginning of index range
parameter BANK_ADDR_OFFSET = 0,
parameter MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE),
parameter WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
) (
`SCOPE_IO_VX_bank
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
output wire perf_read_misses,
output wire perf_write_misses,
output wire perf_mshr_stalls,
`endif
// Core Request
input wire core_req_valid,
input wire [NUM_PORTS-1:0] core_req_pmask,
input wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] core_req_wsel,
input wire [NUM_PORTS-1:0][WORD_SIZE-1:0] core_req_byteen,
input wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] core_req_data,
input wire [NUM_PORTS-1:0][`REQS_BITS-1:0] core_req_tid,
input wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
input wire core_req_rw,
input wire [`LINE_ADDR_WIDTH-1:0] core_req_addr,
output wire core_req_ready,
// Core Response
output wire core_rsp_valid,
output wire [NUM_PORTS-1:0] core_rsp_pmask,
output wire [NUM_PORTS-1:0][`REQS_BITS-1:0] core_rsp_tid,
output wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
output wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
input wire core_rsp_ready,
// Memory request
output wire mem_req_valid,
output wire mem_req_rw,
output wire [NUM_PORTS-1:0] mem_req_pmask,
output wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen,
output wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mem_req_wsel,
output wire [`LINE_ADDR_WIDTH-1:0] mem_req_addr,
output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id,
output wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mem_req_data,
input wire mem_req_ready,
// Memory response
input wire mem_rsp_valid,
input wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id,
input wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data,
output wire mem_rsp_ready,
// flush
input wire flush_enable,
input wire [`LINE_SELECT_BITS-1:0] flush_addr
);
`IGNORE_UNUSED_BEGIN
wire [`DBG_CACHE_REQ_IDW-1:0] req_id_sel, req_id_st0, req_id_st1;
`IGNORE_UNUSED_END
wire [NUM_PORTS-1:0] creq_pmask;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] creq_wsel;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] creq_byteen;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] creq_data;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] creq_tid;
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] creq_tag;
wire creq_rw;
wire [`LINE_ADDR_WIDTH-1:0] creq_addr;
wire creq_valid, creq_ready;
VX_elastic_buffer #(
.DATAW (1 + `LINE_ADDR_WIDTH + NUM_PORTS * (1 + WORD_SELECT_BITS + WORD_SIZE + `WORD_WIDTH + `REQS_BITS + CORE_TAG_WIDTH)),
.SIZE (CREQ_SIZE)
) core_req_queue (
.clk (clk),
.reset (reset),
.ready_in (core_req_ready),
.valid_in (core_req_valid),
.data_in ({core_req_rw, core_req_addr, core_req_pmask, core_req_wsel, core_req_byteen, core_req_data, core_req_tid, core_req_tag}),
.data_out ({creq_rw, creq_addr, creq_pmask, creq_wsel, creq_byteen, creq_data, creq_tid, creq_tag}),
.ready_out (creq_ready),
.valid_out (creq_valid)
);
wire mreq_alm_full;
wire [`LINE_ADDR_WIDTH-1:0] mem_rsp_addr;
wire crsq_valid, crsq_ready;
wire crsq_stall;
wire mshr_valid;
wire mshr_ready;
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id;
wire mshr_alm_full;
wire [MSHR_ADDR_WIDTH-1:0] mshr_dequeue_id;
wire [`LINE_ADDR_WIDTH-1:0] mshr_addr;
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] mshr_tag;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mshr_wsel;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] mshr_tid;
wire [NUM_PORTS-1:0] mshr_pmask;
wire [`LINE_ADDR_WIDTH-1:0] addr_st0, addr_st1;
wire is_read_st0, is_read_st1;
wire is_write_st0, is_write_st1;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] wsel_st0, wsel_st1;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] byteen_st0, byteen_st1;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] req_tid_st0, req_tid_st1;
wire [NUM_PORTS-1:0] pmask_st0, pmask_st1;
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] tag_st0, tag_st1;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] rdata_st1;
wire [`CACHE_LINE_WIDTH-1:0] wdata_st0, wdata_st1;
wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1;
wire valid_st0, valid_st1;
wire is_fill_st0, is_fill_st1;
wire is_mshr_st0, is_mshr_st1;
wire miss_st0, miss_st1;
wire is_flush_st0;
wire mshr_pending_st0, mshr_pending_st1;
// prevent read-during-write hazard when accessing tags/data block RAMs
wire rdw_fill_hazard = valid_st0 && is_fill_st0;
wire rdw_write_hazard = valid_st0 && is_write_st0 && ~creq_rw;
// determine which queue to pop next in priority order
wire mshr_grant = !flush_enable;
wire mshr_enable = mshr_grant && mshr_valid;
wire mrsq_grant = !flush_enable && !mshr_enable;
wire mrsq_enable = mrsq_grant && mem_rsp_valid;
wire creq_grant = !flush_enable && !mshr_enable && !mrsq_enable;
wire creq_enable = creq_grant && creq_valid;
assign mshr_ready = mshr_grant
&& !rdw_fill_hazard // prevent read-during-write hazard
&& !crsq_stall; // ensure core_rsp_queue not full
assign mem_rsp_ready = mrsq_grant
&& !crsq_stall; // ensure core_rsp_queue not full
assign creq_ready = creq_grant
&& !rdw_write_hazard // prevent read-during-write hazard
&& !mreq_alm_full // ensure mem_req_queue not full
&& !mshr_alm_full // ensure mshr not full
&& !crsq_stall; // ensure core_rsp_queue not full
wire flush_fire = flush_enable;
wire mshr_fire = mshr_valid && mshr_ready;
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
wire creq_fire = creq_valid && creq_ready;
assign req_id_sel = mshr_enable ? mshr_tag[0][`CACHE_REQ_ID_RNG] : creq_tag[0][`CACHE_REQ_ID_RNG];
wire [`CACHE_LINE_WIDTH-1:0] wdata_sel;
assign wdata_sel[(NUM_PORTS * `WORD_WIDTH)-1:0] = (mem_rsp_valid || !WRITE_ENABLE) ? mem_rsp_data[(NUM_PORTS * `WORD_WIDTH)-1:0] : creq_data;
for (genvar i = NUM_PORTS * `WORD_WIDTH; i < `CACHE_LINE_WIDTH; ++i) begin
assign wdata_sel[i] = mem_rsp_data[i];
end
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + NUM_PORTS * (WORD_SELECT_BITS + WORD_SIZE + `REQS_BITS + 1 + CORE_TAG_WIDTH) + MSHR_ADDR_WIDTH),
.RESETW (1)
) pipe_reg0 (
.clk (clk),
.reset (reset),
.enable (!crsq_stall),
.data_in ({
flush_fire || mshr_fire || mem_rsp_fire || creq_fire,
flush_enable,
mshr_enable,
mrsq_enable,
creq_enable && ~creq_rw,
creq_enable && creq_rw,
flush_enable ? `LINE_ADDR_WIDTH'(flush_addr) : (mshr_valid ? mshr_addr : (mem_rsp_valid ? mem_rsp_addr : creq_addr)),
wdata_sel,
mshr_valid ? mshr_wsel : creq_wsel,
creq_byteen,
mshr_valid ? mshr_tid : creq_tid,
mshr_valid ? mshr_pmask : creq_pmask,
mshr_valid ? mshr_tag : creq_tag,
mshr_valid ? mshr_dequeue_id : mem_rsp_id
}),
.data_out ({valid_st0, is_flush_st0, is_mshr_st0, is_fill_st0, is_read_st0, is_write_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_id_st0})
);
assign req_id_st0 = tag_st0[0][`CACHE_REQ_ID_RNG];
wire do_fill_st0 = valid_st0 && is_fill_st0;
wire do_flush_st0 = valid_st0 && is_flush_st0;
wire do_lookup_st0 = valid_st0 && ~(is_fill_st0 || is_flush_st0);
wire tag_match_st0;
VX_tag_access #(
.BANK_ID (BANK_ID),
.CACHE_ID (CACHE_ID),
.CACHE_SIZE (CACHE_SIZE),
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.BANK_ADDR_OFFSET (BANK_ADDR_OFFSET)
) tag_access (
.clk (clk),
.reset (reset),
.req_id (req_id_st0),
.stall (crsq_stall),
// read/Fill
.lookup (do_lookup_st0),
.addr (addr_st0),
.fill (do_fill_st0),
.flush (do_flush_st0),
.tag_match (tag_match_st0)
);
// we have a core request hit
assign miss_st0 = (is_read_st0 || is_write_st0) && ~tag_match_st0;
wire [MSHR_ADDR_WIDTH-1:0] mshr_id_a_st0 = (is_read_st0 || is_write_st0) ? mshr_alloc_id : mshr_id_st0;
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + NUM_PORTS * (WORD_SELECT_BITS + WORD_SIZE + `REQS_BITS + 1 + CORE_TAG_WIDTH) + MSHR_ADDR_WIDTH + 1),
.RESETW (1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (!crsq_stall),
.data_in ({valid_st0, is_mshr_st0, is_fill_st0, is_read_st0, is_write_st0, miss_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_id_a_st0, mshr_pending_st0}),
.data_out ({valid_st1, is_mshr_st1, is_fill_st1, is_read_st1, is_write_st1, miss_st1, addr_st1, wdata_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1, mshr_id_st1, mshr_pending_st1})
);
assign req_id_st1 = tag_st1[0][`CACHE_REQ_ID_RNG];
wire do_read_st0 = valid_st0 && is_read_st0;
wire do_read_st1 = valid_st1 && is_read_st1;
wire do_fill_st1 = valid_st1 && is_fill_st1;
wire do_write_st1 = valid_st1 && is_write_st1;
wire do_mshr_st1 = valid_st1 && is_mshr_st1;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] creq_data_st1 = wdata_st1[0 +: NUM_PORTS * `WORD_WIDTH];
`UNUSED_VAR (wdata_st1)
VX_data_access #(
.BANK_ID (BANK_ID),
.CACHE_ID (CACHE_ID),
.CACHE_SIZE (CACHE_SIZE),
.CACHE_LINE_SIZE(CACHE_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_PORTS (NUM_PORTS),
.WORD_SIZE (WORD_SIZE),
.WRITE_ENABLE (WRITE_ENABLE)
) data_access (
.clk (clk),
.reset (reset),
.req_id (req_id_st1),
.stall (crsq_stall),
.read (do_read_st1 || do_mshr_st1),
.fill (do_fill_st1),
.write (do_write_st1 && !miss_st1),
.addr (addr_st1),
.wsel (wsel_st1),
.pmask (pmask_st1),
.byteen (byteen_st1),
.fill_data (wdata_st1),
.write_data (creq_data_st1),
.read_data (rdata_st1)
);
wire mshr_allocate = do_read_st0 && !crsq_stall;
wire mshr_replay = do_fill_st0 && !crsq_stall;
wire mshr_lookup = mshr_allocate;
wire mshr_release = do_read_st1 && !miss_st1 && !crsq_stall;
VX_pending_size #(
.SIZE (MSHR_SIZE)
) mshr_pending_size (
.clk (clk),
.reset (reset),
.incr (creq_fire && ~creq_rw),
.decr (mshr_fire || mshr_release),
.full (mshr_alm_full),
`UNUSED_PIN (size),
`UNUSED_PIN (empty)
);
VX_miss_resrv #(
.BANK_ID (BANK_ID),
.CACHE_ID (CACHE_ID),
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_PORTS (NUM_PORTS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.MSHR_SIZE (MSHR_SIZE),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH)
) miss_resrv (
.clk (clk),
.reset (reset),
.deq_req_id (req_id_sel),
.lkp_req_id (req_id_st0),
.rel_req_id (req_id_st1),
// allocate
.allocate_valid (mshr_allocate),
.allocate_addr (addr_st0),
.allocate_data ({wsel_st0, tag_st0, req_tid_st0, pmask_st0}),
.allocate_id (mshr_alloc_id),
`UNUSED_PIN (allocate_ready),
// lookup
.lookup_valid (mshr_lookup),
.lookup_replay (mshr_replay),
.lookup_id (mshr_alloc_id),
.lookup_addr (addr_st0),
.lookup_match (mshr_pending_st0),
// fill
.fill_valid (mem_rsp_fire),
.fill_id (mem_rsp_id),
.fill_addr (mem_rsp_addr),
// dequeue
.dequeue_valid (mshr_valid),
.dequeue_id (mshr_dequeue_id),
.dequeue_addr (mshr_addr),
.dequeue_data ({mshr_wsel, mshr_tag, mshr_tid, mshr_pmask}),
.dequeue_ready (mshr_ready),
// release
.release_valid (mshr_release),
.release_id (mshr_id_st1)
);
// Enqueue core response
wire [NUM_PORTS-1:0] crsq_pmask;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] crsq_data;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] crsq_tid;
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] crsq_tag;
assign crsq_valid = (do_read_st1 && !miss_st1)
|| do_mshr_st1;
assign crsq_stall = crsq_valid && !crsq_ready;
assign crsq_pmask = pmask_st1;
assign crsq_tid = req_tid_st1;
assign crsq_data = rdata_st1;
assign crsq_tag = tag_st1;
VX_elastic_buffer #(
.DATAW (NUM_PORTS * (CORE_TAG_WIDTH + 1 + `WORD_WIDTH + `REQS_BITS)),
.SIZE (CRSQ_SIZE),
.OUT_REG (1)
) core_rsp_req (
.clk (clk),
.reset (reset),
.valid_in (crsq_valid),
.data_in ({crsq_tag, crsq_pmask, crsq_data, crsq_tid}),
.ready_in (crsq_ready),
.valid_out (core_rsp_valid),
.data_out ({core_rsp_tag, core_rsp_pmask, core_rsp_data, core_rsp_tid}),
.ready_out (core_rsp_ready)
);
// Enqueue memory request
wire mreq_push, mreq_pop, mreq_empty;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mreq_data;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mreq_byteen;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mreq_wsel;
wire [NUM_PORTS-1:0] mreq_pmask;
wire [`LINE_ADDR_WIDTH-1:0] mreq_addr;
wire [MSHR_ADDR_WIDTH-1:0] mreq_id;
wire mreq_rw;
assign mreq_push = (do_read_st1 && miss_st1 && !mshr_pending_st1)
|| do_write_st1;
assign mreq_pop = mem_req_valid && mem_req_ready;
assign mreq_rw = WRITE_ENABLE && is_write_st1;
assign mreq_addr = addr_st1;
assign mreq_id = mshr_id_st1;
assign mreq_pmask= pmask_st1;
assign mreq_wsel = wsel_st1;
assign mreq_byteen = byteen_st1;
assign mreq_data = creq_data_st1;
VX_fifo_queue #(
.DATAW (1 + `LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH)),
.SIZE (MREQ_SIZE),
.ALM_FULL (MREQ_SIZE-2),
.OUT_REG (1 == NUM_BANKS)
) mem_req_queue (
.clk (clk),
.reset (reset),
.push (mreq_push),
.pop (mreq_pop),
.data_in ({mreq_rw, mreq_addr, mreq_id, mreq_pmask, mreq_byteen, mreq_wsel, mreq_data}),
.data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_pmask, mem_req_byteen, mem_req_wsel, mem_req_data}),
.empty (mreq_empty),
.alm_full (mreq_alm_full),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (size)
);
assign mem_req_valid = !mreq_empty;
///////////////////////////////////////////////////////////////////////////////
`SCOPE_ASSIGN (valid_st0, valid_st0);
`SCOPE_ASSIGN (valid_st1, valid_st1);
`SCOPE_ASSIGN (is_fill_st0, is_fill_st0);
`SCOPE_ASSIGN (is_mshr_st0, is_mshr_st0);
`SCOPE_ASSIGN (miss_st0, miss_st0);
`SCOPE_ASSIGN (crsq_stall, crsq_stall);
`SCOPE_ASSIGN (mreq_alm_full, mreq_alm_full);
`SCOPE_ASSIGN (mshr_alm_full, mshr_alm_full);
`SCOPE_ASSIGN (addr_st0, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID));
`SCOPE_ASSIGN (addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
`ifdef PERF_ENABLE
assign perf_read_misses = do_read_st1 && miss_st1;
assign perf_write_misses = do_write_st1 && miss_st1;
assign perf_mshr_stalls = mshr_alm_full;
`endif
`ifdef DBG_TRACE_CACHE_BANK
wire crsq_fire = crsq_valid && crsq_ready;
wire pipeline_stall = (mshr_valid || mem_rsp_valid || creq_valid)
&& ~(mshr_fire || mem_rsp_fire || creq_fire);
always @(posedge clk) begin
if (pipeline_stall) begin
dpi_trace("%d: *** cache%0d:%0d stall: crsq=%b, mreq=%b, mshr=%b\n", $time, CACHE_ID, BANK_ID, crsq_stall, mreq_alm_full, mshr_alm_full);
end
if (flush_enable) begin
dpi_trace("%d: cache%0d:%0d flush: addr=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(flush_addr, BANK_ID));
end
if (mem_rsp_fire) begin
dpi_trace("%d: cache%0d:%0d fill-rsp: addr=%0h, id=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data);
end
if (mshr_fire) begin
dpi_trace("%d: cache%0d:%0d mshr-pop: addr=%0h, tag=%0h, pmask=%b, tid=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mshr_addr, BANK_ID), mshr_tag, mshr_pmask, mshr_tid, req_id_sel);
end
if (creq_fire) begin
if (creq_rw)
dpi_trace("%d: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, data=%0h (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, creq_data, req_id_sel);
else
dpi_trace("%d: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, req_id_sel);
end
if (crsq_fire) begin
dpi_trace("%d: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, pmask=%b, tid=%0d, data=%0h (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_pmask, crsq_tid, crsq_data, req_id_st1);
end
if (mreq_push) begin
if (is_write_st1)
dpi_trace("%d: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_data, mreq_byteen, req_id_st1);
else
dpi_trace("%d: cache%0d:%0d fill-req: addr=%0h, id=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_id, req_id_st1);
end
end
`endif
endmodule

1054
hw/rtl/cache/VX_cache.sv vendored

File diff suppressed because it is too large Load Diff

549
hw/rtl/cache/VX_cache_bank.sv vendored Normal file
View File

@@ -0,0 +1,549 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_cache_define.vh"
module VX_cache_bank #(
parameter `STRING INSTANCE_ID= "",
parameter BANK_ID = 0,
// Number of Word requests per cycle
parameter NUM_REQS = 1,
// Size of cache in bytes
parameter CACHE_SIZE = 1024,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 16,
// Number of banks
parameter NUM_BANKS = 1,
// Number of associative ways
parameter NUM_WAYS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 4,
// Core Response Queue Size
parameter CRSQ_SIZE = 1,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 1,
// Memory Request Queue Size
parameter MREQ_SIZE = 1,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// Request debug identifier
parameter UUID_WIDTH = 0,
// core request tag size
parameter TAG_WIDTH = UUID_WIDTH + 1,
// Core response output register
parameter CORE_OUT_REG = 0,
// Memory request output register
parameter MEM_OUT_REG = 0,
parameter MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE),
parameter REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS),
parameter WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS)
) (
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
output wire perf_read_misses,
output wire perf_write_misses,
output wire perf_mshr_stalls,
`endif
// Core Request
input wire core_req_valid,
input wire [`CS_LINE_ADDR_WIDTH-1:0] core_req_addr,
input wire core_req_rw,
input wire [WORD_SEL_WIDTH-1:0] core_req_wsel,
input wire [WORD_SIZE-1:0] core_req_byteen,
input wire [`CS_WORD_WIDTH-1:0] core_req_data,
input wire [TAG_WIDTH-1:0] core_req_tag,
input wire [REQ_SEL_WIDTH-1:0] core_req_idx,
output wire core_req_ready,
// Core Response
output wire core_rsp_valid,
output wire [`CS_WORD_WIDTH-1:0] core_rsp_data,
output wire [TAG_WIDTH-1:0] core_rsp_tag,
output wire [REQ_SEL_WIDTH-1:0] core_rsp_idx,
input wire core_rsp_ready,
// Memory request
output wire mem_req_valid,
output wire [`CS_LINE_ADDR_WIDTH-1:0] mem_req_addr,
output wire mem_req_rw,
output wire [WORD_SEL_WIDTH-1:0] mem_req_wsel,
output wire [WORD_SIZE-1:0] mem_req_byteen,
output wire [`CS_WORD_WIDTH-1:0] mem_req_data,
output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id,
input wire mem_req_ready,
// Memory response
input wire mem_rsp_valid,
input wire [`CS_LINE_WIDTH-1:0] mem_rsp_data,
input wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id,
output wire mem_rsp_ready,
// initialization
input wire init_enable,
input wire [`CS_LINE_SEL_BITS-1:0] init_line_sel
);
`IGNORE_UNUSED_BEGIN
wire [`UP(UUID_WIDTH)-1:0] req_uuid_sel, req_uuid_st0, req_uuid_st1;
`IGNORE_UNUSED_END
wire crsq_stall;
wire mshr_alm_full;
wire mreq_alm_full;
wire [`CS_LINE_ADDR_WIDTH-1:0] mem_rsp_addr;
wire replay_valid;
wire [`CS_LINE_ADDR_WIDTH-1:0] replay_addr;
wire replay_rw;
wire [WORD_SEL_WIDTH-1:0] replay_wsel;
wire [WORD_SIZE-1:0] replay_byteen;
wire [`CS_WORD_WIDTH-1:0] replay_data;
wire [TAG_WIDTH-1:0] replay_tag;
wire [REQ_SEL_WIDTH-1:0] replay_idx;
wire [MSHR_ADDR_WIDTH-1:0] replay_id;
wire replay_ready;
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1;
wire rw_st0, rw_st1;
wire [WORD_SEL_WIDTH-1:0] wsel_st0, wsel_st1;
wire [WORD_SIZE-1:0] byteen_st0, byteen_st1;
wire [REQ_SEL_WIDTH-1:0] req_idx_st0, req_idx_st1;
wire [TAG_WIDTH-1:0] tag_st0, tag_st1;
wire [`CS_WORD_WIDTH-1:0] read_data_st1;
wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1;
wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0, mshr_id_st0, mshr_id_st1;
wire valid_sel, valid_st0, valid_st1;
wire is_init_st0;
wire is_creq_st0, is_creq_st1;
wire is_fill_st0, is_fill_st1;
wire is_replay_st0, is_replay_st1;
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0;
wire [MSHR_ADDR_WIDTH-1:0] mshr_tail_st0, mshr_tail_st1;
wire mshr_pending_st0, mshr_pending_st1;
wire rdw_hazard_st0;
reg rdw_hazard_st1;
wire pipe_stall = crsq_stall || rdw_hazard_st1;
// inputs arbitration:
// mshr replay has highest priority to maximize utilization since there is no miss.
// handle memory responses next to prevent deadlock with potential memory request from a miss.
wire replay_grant = ~init_enable;
wire replay_enable = replay_grant && replay_valid;
wire fill_grant = ~init_enable && ~replay_enable;
wire fill_enable = fill_grant && mem_rsp_valid;
wire creq_grant = ~init_enable && ~replay_enable && ~fill_enable;
wire creq_enable = creq_grant && core_req_valid;
assign replay_ready = replay_grant
&& ~rdw_hazard_st0
&& ~pipe_stall;
assign mem_rsp_ready = fill_grant
&& ~pipe_stall;
assign core_req_ready = creq_grant
&& ~mreq_alm_full
&& ~mshr_alm_full
&& ~pipe_stall;
wire init_fire = init_enable;
wire replay_fire = replay_valid && replay_ready;
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
wire core_req_fire = core_req_valid && core_req_ready;
wire [TAG_WIDTH-1:0] mshr_creq_tag = replay_enable ? replay_tag : core_req_tag;
if (UUID_WIDTH != 0) begin
assign req_uuid_sel = mshr_creq_tag[TAG_WIDTH-1 -: UUID_WIDTH];
end else begin
assign req_uuid_sel = 0;
end
`UNUSED_VAR (mshr_creq_tag)
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || core_req_fire;
assign addr_sel = init_enable ? `CS_LINE_ADDR_WIDTH'(init_line_sel) :
(replay_valid ? replay_addr :
(mem_rsp_valid ? mem_rsp_addr : core_req_addr));
assign data_sel[`CS_WORD_WIDTH-1:0] = (mem_rsp_valid || !WRITE_ENABLE) ? mem_rsp_data[`CS_WORD_WIDTH-1:0] : (replay_valid ? replay_data : core_req_data);
for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin
assign data_sel[i] = mem_rsp_data[i];
end
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
.RESETW (1)
) pipe_reg0 (
.clk (clk),
.reset (reset),
.enable (~pipe_stall),
.data_in ({
valid_sel,
init_enable,
replay_enable,
fill_enable,
creq_enable,
addr_sel,
data_sel,
replay_valid ? replay_rw : core_req_rw,
replay_valid ? replay_byteen : core_req_byteen,
replay_valid ? replay_wsel : core_req_wsel,
replay_valid ? replay_idx : core_req_idx,
replay_valid ? replay_tag : core_req_tag,
replay_id
}),
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_creq_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
);
if (UUID_WIDTH != 0) begin
assign req_uuid_st0 = tag_st0[TAG_WIDTH-1 -: UUID_WIDTH];
end else begin
assign req_uuid_st0 = 0;
end
wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0;
wire do_fill_st0 = valid_st0 && is_fill_st0;
wire do_init_st0 = valid_st0 && is_init_st0;
wire do_lookup_st0 = valid_st0 && ~(is_fill_st0 || is_init_st0);
wire [`CS_WORD_WIDTH-1:0] write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0];
wire [NUM_WAYS-1:0] tag_matches_st0, tag_matches_st1;
wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1;
`RESET_RELAY (tag_reset, reset);
VX_cache_tags #(
.INSTANCE_ID(INSTANCE_ID),
.BANK_ID (BANK_ID),
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_WAYS (NUM_WAYS),
.WORD_SIZE (WORD_SIZE),
.UUID_WIDTH (UUID_WIDTH)
) cache_tags (
.clk (clk),
.reset (tag_reset),
.req_uuid (req_uuid_st0),
.stall (pipe_stall),
// read/Fill
.lookup (do_lookup_st0),
.line_addr (addr_st0),
.fill (do_fill_st0),
.init (do_init_st0),
.way_sel (way_sel_st0),
.tag_matches(tag_matches_st0)
);
assign mshr_id_st0 = is_creq_st0 ? mshr_alloc_id_st0 : replay_id_st0;
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + NUM_WAYS + 1),
.RESETW (1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (~pipe_stall),
.data_in ({valid_st0, is_replay_st0, is_fill_st0, is_creq_st0, rw_st0, addr_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_tail_st0, tag_matches_st0, way_sel_st0, mshr_pending_st0}),
.data_out ({valid_st1, is_replay_st1, is_fill_st1, is_creq_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_tail_st1, tag_matches_st1, way_sel_st1, mshr_pending_st1})
);
// we have a tag hit
wire is_hit_st1 = (| tag_matches_st1);
if (UUID_WIDTH != 0) begin
assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH];
end else begin
assign req_uuid_st1 = 0;
end
wire do_creq_rd_st1 = valid_st1 && is_creq_st1 && ~rw_st1;
wire do_creq_wr_st1 = valid_st1 && is_creq_st1 && rw_st1;
wire do_fill_st1 = valid_st1 && is_fill_st1;
wire do_replay_rd_st1 = valid_st1 && is_replay_st1 && ~rw_st1;
wire do_replay_wr_st1 = valid_st1 && is_replay_st1 && rw_st1;
wire do_read_hit_st1 = do_creq_rd_st1 && is_hit_st1;
wire do_read_miss_st1 = do_creq_rd_st1 && ~is_hit_st1;
wire do_write_hit_st1 = do_creq_wr_st1 && is_hit_st1;
wire do_write_miss_st1= do_creq_wr_st1 && ~is_hit_st1;
`UNUSED_VAR (do_write_miss_st1)
// ensure mshr replay always get a hit
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("runtime error: invalid mshr replay"));
// detect BRAM's read-during-write hazard
assign rdw_hazard_st0 = do_fill_st0; // after a fill
always @(posedge clk) begin
rdw_hazard_st1 <= (do_creq_rd_st0 && do_write_hit_st1 && (addr_st0 == addr_st1))
&& ~rdw_hazard_st1; // after a write to same address
end
wire [`CS_WORD_WIDTH-1:0] write_data_st1 = data_st1[`CS_WORD_WIDTH-1:0];
wire [`CS_LINE_WIDTH-1:0] fill_data_st1 = data_st1;
`RESET_RELAY (data_reset, reset);
VX_cache_data #(
.INSTANCE_ID (INSTANCE_ID),
.BANK_ID (BANK_ID),
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_WAYS (NUM_WAYS),
.WORD_SIZE (WORD_SIZE),
.WRITE_ENABLE (WRITE_ENABLE),
.UUID_WIDTH (UUID_WIDTH)
) cache_data (
.clk (clk),
.reset (data_reset),
.req_uuid (req_uuid_st1),
.stall (pipe_stall),
.read (do_read_hit_st1 || do_replay_rd_st1),
.fill (do_fill_st1),
.write (do_write_hit_st1 || do_replay_wr_st1),
.way_sel (way_sel_st1 | tag_matches_st1),
.line_addr (addr_st1),
.wsel (wsel_st1),
.byteen (byteen_st1),
.fill_data (fill_data_st1),
.write_data (write_data_st1),
.read_data (read_data_st1)
);
wire [MSHR_SIZE-1:0] mshr_matches_st0;
wire mshr_allocate_st0 = valid_st0 && is_creq_st0 && ~pipe_stall;
wire mshr_lookup_st0 = mshr_allocate_st0;
wire mshr_finalize_st1 = valid_st1 && is_creq_st1 && ~pipe_stall;
wire mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1);
VX_pending_size #(
.SIZE (MSHR_SIZE)
) mshr_pending_size (
.clk (clk),
.reset (reset),
.incr (core_req_fire),
.decr (replay_fire || (mshr_finalize_st1 && mshr_release_st1)),
.full (mshr_alm_full),
`UNUSED_PIN (size),
`UNUSED_PIN (empty)
);
`RESET_RELAY (mshr_reset, reset);
VX_cache_mshr #(
.INSTANCE_ID (INSTANCE_ID),
.BANK_ID (BANK_ID),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.MSHR_SIZE (MSHR_SIZE),
.UUID_WIDTH (UUID_WIDTH),
.DATA_WIDTH (WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + TAG_WIDTH + REQ_SEL_WIDTH)
) cache_mshr (
.clk (clk),
.reset (mshr_reset),
.deq_req_uuid (req_uuid_sel),
.lkp_req_uuid (req_uuid_st0),
.fin_req_uuid (req_uuid_st1),
// memory fill
.fill_valid (mem_rsp_fire),
.fill_id (mem_rsp_id),
.fill_addr (mem_rsp_addr),
// dequeue
.dequeue_valid (replay_valid),
.dequeue_addr (replay_addr),
.dequeue_rw (replay_rw),
.dequeue_data ({replay_wsel, replay_byteen, replay_data, replay_tag, replay_idx}),
.dequeue_id (replay_id),
.dequeue_ready (replay_ready),
// allocate
.allocate_valid (mshr_allocate_st0),
.allocate_addr (addr_st0),
.allocate_rw (rw_st0),
.allocate_data ({wsel_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}),
.allocate_id (mshr_alloc_id_st0),
.allocate_tail (mshr_tail_st0),
`UNUSED_PIN (allocate_ready),
// lookup
.lookup_valid (mshr_lookup_st0),
.lookup_addr (addr_st0),
.lookup_matches (mshr_matches_st0),
// finalize
.finalize_valid (mshr_finalize_st1),
.finalize_release(mshr_release_st1),
.finalize_pending(mshr_pending_st1),
.finalize_id (mshr_id_st1),
.finalize_tail (mshr_tail_st1)
);
// ignore allocated id from mshr matches
wire [MSHR_SIZE-1:0] lookup_matches;
for (genvar i = 0; i < MSHR_SIZE; ++i) begin
assign lookup_matches[i] = (i != mshr_alloc_id_st0) && mshr_matches_st0[i];
end
assign mshr_pending_st0 = (| lookup_matches);
// schedule core response
wire crsq_valid, crsq_ready;
wire [`CS_WORD_WIDTH-1:0] crsq_data;
wire [REQ_SEL_WIDTH-1:0] crsq_idx;
wire [TAG_WIDTH-1:0] crsq_tag;
assign crsq_valid = do_read_hit_st1 || do_replay_rd_st1;
assign crsq_idx = req_idx_st1;
assign crsq_data = read_data_st1;
assign crsq_tag = tag_st1;
`RESET_RELAY (crsp_reset, reset);
VX_elastic_buffer #(
.DATAW (TAG_WIDTH + `CS_WORD_WIDTH + REQ_SEL_WIDTH),
.SIZE (CRSQ_SIZE),
.OUT_REG (CORE_OUT_REG)
) core_rsp_queue (
.clk (clk),
.reset (crsp_reset),
.valid_in (crsq_valid && ~rdw_hazard_st1),
.ready_in (crsq_ready),
.data_in ({crsq_tag, crsq_data, crsq_idx}),
.data_out ({core_rsp_tag, core_rsp_data, core_rsp_idx}),
.valid_out (core_rsp_valid),
.ready_out (core_rsp_ready)
);
assign crsq_stall = crsq_valid && ~crsq_ready;
// schedule memory request
wire mreq_push, mreq_pop, mreq_empty;
wire [`CS_WORD_WIDTH-1:0] mreq_data;
wire [WORD_SIZE-1:0] mreq_byteen;
wire [WORD_SEL_WIDTH-1:0] mreq_wsel;
wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_addr;
wire [MSHR_ADDR_WIDTH-1:0] mreq_id;
wire mreq_rw;
assign mreq_push = (do_read_miss_st1 && ~mshr_pending_st1)
|| do_creq_wr_st1;
assign mreq_pop = mem_req_valid && mem_req_ready;
assign mreq_rw = WRITE_ENABLE && rw_st1;
assign mreq_addr = addr_st1;
assign mreq_id = mshr_id_st1;
assign mreq_wsel = wsel_st1;
assign mreq_byteen = byteen_st1;
assign mreq_data = write_data_st1;
`RESET_RELAY (mreq_reset, reset);
VX_fifo_queue #(
.DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + `CS_WORD_WIDTH),
.DEPTH (MREQ_SIZE),
.ALM_FULL (MREQ_SIZE-2),
.OUT_REG (MEM_OUT_REG)
) mem_req_queue (
.clk (clk),
.reset (mreq_reset),
.push (mreq_push),
.pop (mreq_pop),
.data_in ({mreq_rw, mreq_addr, mreq_id, mreq_byteen, mreq_wsel, mreq_data}),
.data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_byteen, mem_req_wsel, mem_req_data}),
.empty (mreq_empty),
.alm_full (mreq_alm_full),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (size)
);
assign mem_req_valid = ~mreq_empty;
///////////////////////////////////////////////////////////////////////////////
`ifdef PERF_ENABLE
assign perf_read_misses = do_read_miss_st1;
assign perf_write_misses = do_write_miss_st1;
assign perf_mshr_stalls = mshr_alm_full;
`endif
`ifdef DBG_TRACE_CACHE_BANK
wire crsq_fire = crsq_valid && crsq_ready;
wire pipeline_stall = (replay_valid || mem_rsp_valid || core_req_valid)
&& ~(replay_fire || mem_rsp_fire || core_req_fire);
always @(posedge clk) begin
if (pipeline_stall) begin
`TRACE(3, ("%d: *** %s-bank%0d stall: crsq=%b, mreq=%b, mshr=%b\n", $time, INSTANCE_ID, BANK_ID, crsq_stall, mreq_alm_full, mshr_alm_full));
end
if (init_enable) begin
`TRACE(2, ("%d: %s-bank%0d init: addr=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(init_line_sel, BANK_ID)));
end
if (mem_rsp_fire) begin
`TRACE(2, ("%d: %s-bank%0d fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data));
end
if (replay_fire) begin
`TRACE(2, ("%d: %s-bank%0d mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel));
end
if (core_req_fire) begin
if (core_req_rw)
`TRACE(2, ("%d: %s-bank%0d core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel));
else
`TRACE(2, ("%d: %s-bank%0d core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel));
end
if (crsq_fire) begin
`TRACE(2, ("%d: %s-bank%0d core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_idx, crsq_data, req_uuid_st1));
end
if (mreq_push) begin
if (do_creq_wr_st1)
`TRACE(2, ("%d: %s-bank%0d writethrough: addr=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(mreq_addr, BANK_ID), mreq_byteen, mreq_data, req_uuid_st1));
else
`TRACE(2, ("%d: %s-bank%0d fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(mreq_addr, BANK_ID), mreq_id, req_uuid_st1));
end
end
`endif
endmodule

348
hw/rtl/cache/VX_cache_bypass.sv vendored Normal file
View File

@@ -0,0 +1,348 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_platform.vh"
module VX_cache_bypass #(
parameter NUM_REQS = 1,
parameter NC_TAG_BIT = 0,
parameter NC_ENABLE = 0,
parameter PASSTHRU = 0,
parameter CORE_ADDR_WIDTH = 1,
parameter CORE_DATA_SIZE = 1,
parameter CORE_TAG_IN_WIDTH = 1,
parameter MEM_ADDR_WIDTH = 1,
parameter MEM_DATA_SIZE = 1,
parameter MEM_TAG_IN_WIDTH = 1,
parameter MEM_TAG_OUT_WIDTH = 1,
parameter UUID_WIDTH = 0,
parameter CORE_DATA_WIDTH = CORE_DATA_SIZE * 8,
parameter MEM_DATA_WIDTH = MEM_DATA_SIZE * 8,
parameter CORE_TAG_OUT_WIDTH= CORE_TAG_IN_WIDTH - NC_ENABLE
) (
input wire clk,
input wire reset,
// Core request in
input wire [NUM_REQS-1:0] core_req_valid_in,
input wire [NUM_REQS-1:0] core_req_rw_in,
input wire [NUM_REQS-1:0][CORE_ADDR_WIDTH-1:0] core_req_addr_in,
input wire [NUM_REQS-1:0][CORE_DATA_SIZE-1:0] core_req_byteen_in,
input wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_req_data_in,
input wire [NUM_REQS-1:0][CORE_TAG_IN_WIDTH-1:0] core_req_tag_in,
output wire [NUM_REQS-1:0] core_req_ready_in,
// Core request out
output wire [NUM_REQS-1:0] core_req_valid_out,
output wire [NUM_REQS-1:0] core_req_rw_out,
output wire [NUM_REQS-1:0][CORE_ADDR_WIDTH-1:0] core_req_addr_out,
output wire [NUM_REQS-1:0][CORE_DATA_SIZE-1:0] core_req_byteen_out,
output wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_req_data_out,
output wire [NUM_REQS-1:0][CORE_TAG_OUT_WIDTH-1:0] core_req_tag_out,
input wire [NUM_REQS-1:0] core_req_ready_out,
// Core response in
input wire [NUM_REQS-1:0] core_rsp_valid_in,
input wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_in,
input wire [NUM_REQS-1:0][CORE_TAG_OUT_WIDTH-1:0] core_rsp_tag_in,
output wire [NUM_REQS-1:0] core_rsp_ready_in,
// Core response out
output wire [NUM_REQS-1:0] core_rsp_valid_out,
output wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_out,
output wire [NUM_REQS-1:0][CORE_TAG_IN_WIDTH-1:0] core_rsp_tag_out,
input wire [NUM_REQS-1:0] core_rsp_ready_out,
// Memory request in
input wire mem_req_valid_in,
input wire mem_req_rw_in,
input wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_in,
input wire [MEM_DATA_SIZE-1:0] mem_req_byteen_in,
input wire [MEM_DATA_WIDTH-1:0] mem_req_data_in,
input wire [MEM_TAG_IN_WIDTH-1:0] mem_req_tag_in,
output wire mem_req_ready_in,
// Memory request out
output wire mem_req_valid_out,
output wire mem_req_rw_out,
output wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_out,
output wire [MEM_DATA_SIZE-1:0] mem_req_byteen_out,
output wire [MEM_DATA_WIDTH-1:0] mem_req_data_out,
output wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_tag_out,
input wire mem_req_ready_out,
// Memory response in
input wire mem_rsp_valid_in,
input wire [MEM_DATA_WIDTH-1:0] mem_rsp_data_in,
input wire [MEM_TAG_OUT_WIDTH-1:0] mem_rsp_tag_in,
output wire mem_rsp_ready_in,
// Memory response out
output wire mem_rsp_valid_out,
output wire [MEM_DATA_WIDTH-1:0] mem_rsp_data_out,
output wire [MEM_TAG_IN_WIDTH-1:0] mem_rsp_tag_out,
input wire mem_rsp_ready_out
);
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
localparam REQ_SEL_BITS = `CLOG2(NUM_REQS);
localparam MUX_DATAW = CORE_TAG_IN_WIDTH + CORE_DATA_WIDTH + CORE_DATA_SIZE + CORE_ADDR_WIDTH + 1;
localparam WORDS_PER_LINE = MEM_DATA_SIZE / CORE_DATA_SIZE;
localparam WSEL_BITS = `CLOG2(WORDS_PER_LINE);
localparam CORE_TAG_ID_BITS = CORE_TAG_IN_WIDTH - UUID_WIDTH;
localparam MEM_TAG_ID_BITS = REQ_SEL_BITS + WSEL_BITS + CORE_TAG_ID_BITS;
localparam MEM_TAG_OUT_NC_WIDTH = MEM_TAG_OUT_WIDTH - 1 + NC_ENABLE;
// core request handling
wire [NUM_REQS-1:0] core_req_valid_in_nc;
wire [NUM_REQS-1:0] core_req_nc_idxs;
wire [`UP(REQ_SEL_BITS)-1:0] core_req_nc_idx;
wire [NUM_REQS-1:0] core_req_nc_sel;
wire core_req_nc_valid;
for (genvar i = 0; i < NUM_REQS; ++i) begin
if (PASSTHRU != 0) begin
assign core_req_nc_idxs[i] = 1'b1;
end else begin
assign core_req_nc_idxs[i] = core_req_tag_in[i][NC_TAG_BIT];
end
end
assign core_req_valid_in_nc = core_req_valid_in & core_req_nc_idxs;
wire core_req_in_fire = | (core_req_valid_in & core_req_ready_in);
VX_generic_arbiter #(
.NUM_REQS (NUM_REQS),
.TYPE (PASSTHRU ? "R" : "P"),
.LOCK_ENABLE (1)
) req_arb (
.clk (clk),
.reset (reset),
.unlock (core_req_in_fire),
.requests (core_req_valid_in_nc),
.grant_index (core_req_nc_idx),
.grant_onehot (core_req_nc_sel),
.grant_valid (core_req_nc_valid)
);
assign core_req_valid_out = core_req_valid_in & ~core_req_nc_idxs;
assign core_req_rw_out = core_req_rw_in;
assign core_req_addr_out = core_req_addr_in;
assign core_req_byteen_out = core_req_byteen_in;
assign core_req_data_out = core_req_data_in;
for (genvar i = 0; i < NUM_REQS; ++i) begin
VX_bits_remove #(
.N (CORE_TAG_IN_WIDTH),
.S (NC_ENABLE),
.POS (NC_TAG_BIT)
) core_req_tag_nc_remove (
.data_in (core_req_tag_in[i]),
.data_out (core_req_tag_out[i])
);
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_ready_in[i] = core_req_valid_in_nc[i] ? (~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i])
: core_req_ready_out[i];
end
// memory request handling
assign mem_req_valid_out = mem_req_valid_in || core_req_nc_valid;
assign mem_req_ready_in = mem_req_ready_out;
wire [CORE_TAG_IN_WIDTH-1:0] core_req_tag_in_sel;
wire [CORE_DATA_WIDTH-1:0] core_req_data_in_sel;
wire [CORE_DATA_SIZE-1:0] core_req_byteen_in_sel;
wire [CORE_ADDR_WIDTH-1:0] core_req_addr_in_sel;
wire core_req_rw_in_sel;
wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_nc_mux_in[i] = {core_req_tag_in[i], core_req_data_in[i], core_req_byteen_in[i], core_req_addr_in[i], core_req_rw_in[i]};
end
assign {core_req_tag_in_sel, core_req_data_in_sel, core_req_byteen_in_sel, core_req_addr_in_sel, core_req_rw_in_sel} = core_req_nc_mux_in[core_req_nc_idx];
wire [CORE_TAG_ID_BITS-1:0] core_req_in_id = core_req_tag_in_sel[CORE_TAG_ID_BITS-1:0];
assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in_sel;
assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in_sel[WSEL_BITS +: MEM_ADDR_WIDTH];
wire [MEM_TAG_ID_BITS-1:0] mem_req_tag_id_bypass;
if (WORDS_PER_LINE > 1) begin
reg [WORDS_PER_LINE-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_in_r;
reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r;
wire [WSEL_BITS-1:0] req_wsel = core_req_addr_in_sel[WSEL_BITS-1:0];
always @(*) begin
mem_req_byteen_in_r = '0;
mem_req_byteen_in_r[req_wsel] = core_req_byteen_in_sel;
mem_req_data_in_r = 'x;
mem_req_data_in_r[req_wsel] = core_req_data_in_sel;
end
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r;
assign mem_req_data_out = mem_req_valid_in ? mem_req_data_in : mem_req_data_in_r;
if (NUM_REQS > 1) begin
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, req_wsel, core_req_in_id});
end else begin
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({req_wsel, core_req_in_id});
end
end else begin
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in_sel;
assign mem_req_data_out = mem_req_valid_in ? mem_req_data_in : core_req_data_in_sel;
if (NUM_REQS > 1) begin
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, core_req_in_id});
end else begin
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_in_id});
end
end
wire [MEM_TAG_OUT_NC_WIDTH-1:0] mem_req_tag_bypass;
if (UUID_WIDTH != 0) begin
assign mem_req_tag_bypass = {core_req_tag_in_sel[CORE_TAG_ID_BITS +: UUID_WIDTH], mem_req_tag_id_bypass};
end else begin
assign mem_req_tag_bypass = mem_req_tag_id_bypass;
end
wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_tag_bypass_nc;
wire [(MEM_TAG_IN_WIDTH + 1)-1:0] mem_req_tag_in_nc;
VX_bits_insert #(
.N (MEM_TAG_OUT_NC_WIDTH),
.S (NC_ENABLE ? 0 : 1),
.POS (NC_TAG_BIT)
) mem_req_tag_bypass_nc_insert (
.data_in (mem_req_tag_bypass),
.sel_in (1'b0),
.data_out (mem_req_tag_bypass_nc)
);
VX_bits_insert #(
.N (MEM_TAG_IN_WIDTH),
.POS (NC_TAG_BIT)
) mem_req_tag_in_nc_insert (
.data_in (mem_req_tag_in),
.sel_in (1'b0),
.data_out (mem_req_tag_in_nc)
);
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_nc) : mem_req_tag_bypass_nc;
// core response handling
wire [NUM_REQS-1:0][CORE_TAG_IN_WIDTH-1:0] core_rsp_tag_in_nc;
wire is_mem_rsp_nc;
if (PASSTHRU != 0) begin
assign is_mem_rsp_nc = mem_rsp_valid_in;
end else begin
assign is_mem_rsp_nc = mem_rsp_valid_in && mem_rsp_tag_in[NC_TAG_BIT];
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
VX_bits_insert #(
.N (CORE_TAG_OUT_WIDTH),
.S (NC_ENABLE),
.POS (NC_TAG_BIT)
) core_rsp_tag_in_nc_insert (
.data_in (core_rsp_tag_in[i]),
.sel_in ('0),
.data_out (core_rsp_tag_in_nc[i])
);
end
wire [MEM_TAG_OUT_NC_WIDTH-1:0] mem_rsp_tag_in_nc;
VX_bits_remove #(
.N (MEM_TAG_OUT_WIDTH),
.S (NC_ENABLE ? 0 : 1),
.POS (NC_TAG_BIT)
) mem_rsp_tag_in_nc_remove (
.data_in (mem_rsp_tag_in),
.data_out (mem_rsp_tag_in_nc)
);
wire [`UP(REQ_SEL_BITS)-1:0] rsp_idx;
if (NUM_REQS > 1) begin
assign rsp_idx = mem_rsp_tag_in_nc[(CORE_TAG_ID_BITS + WSEL_BITS) +: REQ_SEL_BITS];
end else begin
assign rsp_idx = 1'b0;
end
reg [NUM_REQS-1:0] rsp_nc_valid_r;
always @(*) begin
rsp_nc_valid_r = '0;
rsp_nc_valid_r[rsp_idx] = is_mem_rsp_nc;
end
assign core_rsp_valid_out = core_rsp_valid_in | rsp_nc_valid_r;
assign core_rsp_ready_in = core_rsp_ready_out;
if (WORDS_PER_LINE > 1) begin
wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_in_nc[CORE_TAG_ID_BITS +: WSEL_BITS];
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_data_out[i] = core_rsp_valid_in[i] ?
core_rsp_data_in[i] : mem_rsp_data_in[rsp_wsel * CORE_DATA_WIDTH +: CORE_DATA_WIDTH];
end
end else begin
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_data_out[i] = core_rsp_valid_in[i] ? core_rsp_data_in[i] : mem_rsp_data_in;
end
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
if (UUID_WIDTH != 0) begin
assign core_rsp_tag_out[i] = core_rsp_valid_in[i] ? core_rsp_tag_in_nc[i] : {mem_rsp_tag_in_nc[MEM_TAG_OUT_NC_WIDTH-1 -: UUID_WIDTH], mem_rsp_tag_in_nc[CORE_TAG_ID_BITS-1:0]};
end else begin
assign core_rsp_tag_out[i] = core_rsp_valid_in[i] ? core_rsp_tag_in_nc[i] : mem_rsp_tag_in_nc[CORE_TAG_ID_BITS-1:0];
end
end
// memory response handling
if (PASSTHRU != 0) begin
assign mem_rsp_valid_out = 1'b0;
end else begin
assign mem_rsp_valid_out = mem_rsp_valid_in && ~mem_rsp_tag_in[NC_TAG_BIT];
end
assign mem_rsp_data_out = mem_rsp_data_in;
VX_bits_remove #(
.N (MEM_TAG_IN_WIDTH + 1),
.POS (NC_TAG_BIT)
) mem_rsp_tag_out_remove (
.data_in (mem_rsp_tag_in[(MEM_TAG_IN_WIDTH + 1)-1:0]),
.data_out (mem_rsp_tag_out)
);
assign mem_rsp_ready_in = is_mem_rsp_nc ? (~core_rsp_valid_in[rsp_idx] && core_rsp_ready_out[rsp_idx]) : mem_rsp_ready_out;
endmodule

196
hw/rtl/cache/VX_cache_cluster.sv vendored Normal file
View File

@@ -0,0 +1,196 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_cache_define.vh"
module VX_cache_cluster import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
parameter NUM_UNITS = 1,
parameter NUM_INPUTS = 1,
parameter TAG_SEL_IDX = 0,
// Number of requests per cycle
parameter NUM_REQS = 4,
// Size of cache in bytes
parameter CACHE_SIZE = 16384,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 64,
// Number of banks
parameter NUM_BANKS = 1,
// Number of associative ways
parameter NUM_WAYS = 4,
// Size of a word in bytes
parameter WORD_SIZE = 4,
// Core Response Queue Size
parameter CRSQ_SIZE = 2,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 8,
// Memory Response Queue Size
parameter MRSQ_SIZE = 0,
// Memory Request Queue Size
parameter MREQ_SIZE = 4,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// Request debug identifier
parameter UUID_WIDTH = 0,
// core request tag size
parameter TAG_WIDTH = UUID_WIDTH + 1,
// enable bypass for non-cacheable addresses
parameter NC_ENABLE = 0,
// Core response output register
parameter CORE_OUT_REG = 0,
// Memory request output register
parameter MEM_OUT_REG = 0
) (
input wire clk,
input wire reset,
// PERF
`ifdef PERF_ENABLE
output cache_perf_t cache_perf,
`endif
VX_mem_bus_if.slave core_bus_if [NUM_INPUTS * NUM_REQS],
VX_mem_bus_if.master mem_bus_if
);
localparam NUM_CACHES = `UP(NUM_UNITS);
localparam PASSTHRU = (NUM_UNITS == 0);
localparam ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES);
localparam MEM_TAG_WIDTH = PASSTHRU ? (NC_ENABLE ? `CACHE_NC_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
`CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH)) :
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
`STATIC_ASSERT(NUM_INPUTS >= NUM_CACHES, ("invalid parameter"))
`ifdef PERF_ENABLE
cache_perf_t perf_cache_unit[NUM_CACHES];
`PERF_CACHE_REDUCE (cache_perf, perf_cache_unit, NUM_CACHES);
`endif
VX_mem_bus_if #(
.DATA_SIZE (LINE_SIZE),
.TAG_WIDTH (MEM_TAG_WIDTH)
) cache_mem_bus_if[NUM_CACHES]();
VX_mem_bus_if #(
.DATA_SIZE (WORD_SIZE),
.TAG_WIDTH (ARB_TAG_WIDTH)
) arb_core_bus_if[NUM_CACHES * NUM_REQS]();
for (genvar i = 0; i < NUM_REQS; ++i) begin
VX_mem_bus_if #(
.DATA_SIZE (WORD_SIZE),
.TAG_WIDTH (TAG_WIDTH)
) core_bus_tmp_if[NUM_INPUTS]();
VX_mem_bus_if #(
.DATA_SIZE (WORD_SIZE),
.TAG_WIDTH (ARB_TAG_WIDTH)
) arb_core_bus_tmp_if[NUM_CACHES]();
for (genvar j = 0; j < NUM_INPUTS; ++j) begin
`ASSIGN_VX_MEM_BUS_IF (core_bus_tmp_if[j], core_bus_if[j * NUM_REQS + i]);
end
`RESET_RELAY (cache_arb_reset, reset);
VX_mem_arb #(
.NUM_INPUTS (NUM_INPUTS),
.NUM_OUTPUTS (NUM_CACHES),
.DATA_SIZE (WORD_SIZE),
.TAG_WIDTH (TAG_WIDTH),
.TAG_SEL_IDX (TAG_SEL_IDX),
.ARBITER ("R"),
.OUT_REG_REQ ((NUM_INPUTS != NUM_CACHES) ? 2 : 0),
.OUT_REG_RSP ((NUM_INPUTS != NUM_CACHES) ? 2 : 0)
) cache_arb (
.clk (clk),
.reset (cache_arb_reset),
.bus_in_if (core_bus_tmp_if),
.bus_out_if (arb_core_bus_tmp_if)
);
for (genvar k = 0; k < NUM_CACHES; ++k) begin
`ASSIGN_VX_MEM_BUS_IF (arb_core_bus_if[k * NUM_REQS + i], arb_core_bus_tmp_if[k]);
end
end
for (genvar i = 0; i < NUM_CACHES; ++i) begin
`RESET_RELAY (cache_reset, reset);
VX_cache_wrap #(
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, i)),
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_WAYS (NUM_WAYS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.CRSQ_SIZE (CRSQ_SIZE),
.MSHR_SIZE (MSHR_SIZE),
.MRSQ_SIZE (MRSQ_SIZE),
.MREQ_SIZE (MREQ_SIZE),
.WRITE_ENABLE (WRITE_ENABLE),
.UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (ARB_TAG_WIDTH),
.CORE_OUT_REG ((NUM_INPUTS != NUM_CACHES) ? 2 : CORE_OUT_REG),
.MEM_OUT_REG ((NUM_CACHES > 1) ? 2 : MEM_OUT_REG),
.NC_ENABLE (NC_ENABLE),
.PASSTHRU (PASSTHRU)
) cache_wrap (
`ifdef PERF_ENABLE
.cache_perf (perf_cache_unit[i]),
`endif
.clk (clk),
.reset (cache_reset),
.core_bus_if (arb_core_bus_if[i * NUM_REQS +: NUM_REQS]),
.mem_bus_if (cache_mem_bus_if[i])
);
end
`RESET_RELAY (mem_arb_reset, reset);
VX_mem_bus_if #(
.DATA_SIZE (LINE_SIZE),
.TAG_WIDTH (MEM_TAG_WIDTH + `ARB_SEL_BITS(NUM_CACHES, 1))
) mem_bus_tmp_if[1]();
VX_mem_arb #(
.NUM_INPUTS (NUM_CACHES),
.DATA_SIZE (LINE_SIZE),
.TAG_WIDTH (MEM_TAG_WIDTH),
.TAG_SEL_IDX (1), // Skip 0 for NC flag
.ARBITER ("R"),
.OUT_REG_REQ ((NUM_CACHES > 1) ? 2 : 0),
.OUT_REG_RSP ((NUM_CACHES > 1) ? 2 : 0)
) mem_arb (
.clk (clk),
.reset (mem_arb_reset),
.bus_in_if (cache_mem_bus_if),
.bus_out_if (mem_bus_tmp_if)
);
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if[0]);
endmodule

190
hw/rtl/cache/VX_cache_cluster_top.sv vendored Normal file
View File

@@ -0,0 +1,190 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_cache_define.vh"
module VX_cache_cluster_top import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
parameter NUM_UNITS = 2,
parameter NUM_INPUTS = 4,
parameter TAG_SEL_IDX = 0,
// Number of Word requests per cycle
parameter NUM_REQS = 4,
// Size of cache in bytes
parameter CACHE_SIZE = 16384,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 16,
// Number of banks
parameter NUM_BANKS = 4,
// Number of associative ways
parameter NUM_WAYS = 4,
// Size of a word in bytes
parameter WORD_SIZE = 4,
// Core Response Queue Size
parameter CRSQ_SIZE = 2,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 16,
// Memory Response Queue Size
parameter MRSQ_SIZE = 0,
// Memory Request Queue Size
parameter MREQ_SIZE = 4,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// Request debug identifier
parameter UUID_WIDTH = 0,
// core request tag size
parameter TAG_WIDTH = UUID_WIDTH + 16,
// enable bypass for non-cacheable addresses
parameter NC_ENABLE = 1,
// Core response output register
parameter CORE_OUT_REG = 2,
// Memory request output register
parameter MEM_OUT_REG = 2,
parameter NUM_CACHES = `UP(NUM_UNITS),
parameter PASSTHRU = (NUM_UNITS == 0),
parameter ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES),
parameter MEM_TAG_WIDTH = PASSTHRU ? (NC_ENABLE ? `CACHE_NC_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
`CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH)) :
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS)),
parameter MEM_TAG_X_WIDTH = MEM_TAG_WIDTH + `ARB_SEL_BITS(NUM_CACHES, 1)
) (
input wire clk,
input wire reset,
// PERF
`ifdef PERF_ENABLE
output cache_perf_t cache_perf,
`endif
// Core request
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_req_valid,
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_req_rw,
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr,
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data,
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag,
output wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_req_ready,
// Core response
output wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_rsp_valid,
output wire [NUM_INPUTS-1:0][NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_data,
output wire [NUM_INPUTS-1:0][NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag,
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_rsp_ready,
// Memory request
output wire mem_req_valid,
output wire mem_req_rw,
output wire [LINE_SIZE-1:0] mem_req_byteen,
output wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr,
output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
output wire [MEM_TAG_X_WIDTH-1:0] mem_req_tag,
input wire mem_req_ready,
// Memory response
input wire mem_rsp_valid,
input wire [`CS_LINE_WIDTH-1:0] mem_rsp_data,
input wire [MEM_TAG_X_WIDTH-1:0] mem_rsp_tag,
output wire mem_rsp_ready
);
VX_mem_bus_if #(
.DATA_SIZE (WORD_SIZE),
.TAG_WIDTH (TAG_WIDTH)
) core_bus_if[NUM_INPUTS * NUM_REQS]();
VX_mem_bus_if #(
.DATA_SIZE (LINE_SIZE),
.TAG_WIDTH (MEM_TAG_X_WIDTH)
) mem_bus_if();
// Core request
for (genvar i = 0; i < NUM_INPUTS; ++i) begin
for (genvar r = 0; r < NUM_REQS; ++r) begin
assign core_bus_if[i * NUM_REQS + r].req_valid = core_req_valid[i][r];
assign core_bus_if[i * NUM_REQS + r].req_data.rw = core_req_rw[i][r];
assign core_bus_if[i * NUM_REQS + r].req_data.byteen = core_req_byteen[i][r];
assign core_bus_if[i * NUM_REQS + r].req_data.addr = core_req_addr[i][r];
assign core_bus_if[i * NUM_REQS + r].req_data.data = core_req_data[i][r];
assign core_bus_if[i * NUM_REQS + r].req_data.tag = core_req_tag[i][r];
assign core_req_ready[i][r] = core_bus_if[i * NUM_REQS + r].req_ready;
end
end
// Core response
for (genvar i = 0; i < NUM_INPUTS; ++i) begin
for (genvar r = 0; r < NUM_REQS; ++r) begin
assign core_rsp_valid[i][r] = core_bus_if[i * NUM_REQS + r].rsp_valid;
assign core_rsp_data[i][r] = core_bus_if[i * NUM_REQS + r].rsp_data.data;
assign core_rsp_tag[i][r] = core_bus_if[i * NUM_REQS + r].rsp_data.tag;
assign core_bus_if[i * NUM_REQS + r].rsp_ready = core_rsp_ready[i][r];
end
end
// Memory request
assign mem_req_valid = mem_bus_if.req_valid;
assign mem_req_rw = mem_bus_if.req_data.rw;
assign mem_req_byteen = mem_bus_if.req_data.byteen;
assign mem_req_addr = mem_bus_if.req_data.addr;
assign mem_req_data = mem_bus_if.req_data.data;
assign mem_req_tag = mem_bus_if.req_data.tag;
assign mem_bus_if.req_ready = mem_req_ready;
// Memory response
assign mem_bus_if.rsp_valid = mem_rsp_valid;
assign mem_bus_if.rsp_data.data = mem_rsp_data;
assign mem_bus_if.rsp_data.tag = mem_rsp_tag;
assign mem_rsp_ready = mem_bus_if.rsp_ready;
VX_cache_cluster #(
.INSTANCE_ID (INSTANCE_ID),
.NUM_UNITS (NUM_UNITS),
.NUM_INPUTS (NUM_INPUTS),
.TAG_SEL_IDX (TAG_SEL_IDX),
.NUM_REQS (NUM_REQS),
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_WAYS (NUM_WAYS),
.WORD_SIZE (WORD_SIZE),
.CRSQ_SIZE (CRSQ_SIZE),
.MSHR_SIZE (MSHR_SIZE),
.MRSQ_SIZE (MRSQ_SIZE),
.MREQ_SIZE (MREQ_SIZE),
.WRITE_ENABLE (WRITE_ENABLE),
.UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (TAG_WIDTH),
.NC_ENABLE (NC_ENABLE),
.CORE_OUT_REG (CORE_OUT_REG),
.MEM_OUT_REG (MEM_OUT_REG)
) cache (
`ifdef PERF_ENABLE
.cache_perf (cache_perf),
`endif
.clk (clk),
.reset (reset),
.core_bus_if (core_bus_if),
.mem_bus_if (mem_bus_if)
);
endmodule

152
hw/rtl/cache/VX_cache_data.sv vendored Normal file
View File

@@ -0,0 +1,152 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_cache_define.vh"
module VX_cache_data #(
parameter `STRING INSTANCE_ID= "",
parameter BANK_ID = 0,
// Size of cache in bytes
parameter CACHE_SIZE = 1024,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 16,
// Number of banks
parameter NUM_BANKS = 1,
// Number of associative ways
parameter NUM_WAYS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 1,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// Request debug identifier
parameter UUID_WIDTH = 0
) (
input wire clk,
input wire reset,
`IGNORE_UNUSED_BEGIN
input wire[`UP(UUID_WIDTH)-1:0] req_uuid,
`IGNORE_UNUSED_END
input wire stall,
input wire read,
input wire fill,
input wire write,
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
input wire [`UP(`CS_WORD_SEL_BITS)-1:0] wsel,
input wire [WORD_SIZE-1:0] byteen,
input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data,
input wire [`CS_WORD_WIDTH-1:0] write_data,
input wire [NUM_WAYS-1:0] way_sel,
output wire [`CS_WORD_WIDTH-1:0] read_data
);
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (BANK_ID)
`UNUSED_PARAM (WORD_SIZE)
`UNUSED_VAR (reset)
`UNUSED_VAR (line_addr)
`UNUSED_VAR (read)
localparam BYTEENW = (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) ? (LINE_SIZE * NUM_WAYS) : 1;
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] wdata;
wire [BYTEENW-1:0] wren;
if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin
reg [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] wdata_r;
reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_r;
always @(*) begin
wdata_r = {`CS_WORDS_PER_LINE{write_data}};
wren_r = '0;
wren_r[wsel] = byteen;
end
// order the data layout to perform ways multiplexing last
// this allows performing onehot encoding of the way index in parallel with BRAM read.
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w;
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
assign wdata[i] = fill ? {NUM_WAYS{fill_data[i]}} : {NUM_WAYS{wdata_r[i]}};
for (genvar j = 0; j < NUM_WAYS; ++j) begin
assign wren_w[i][j] = (fill ? {WORD_SIZE{1'b1}} : wren_r[i])
& {WORD_SIZE{((NUM_WAYS == 1) || way_sel[j])}};
end
end
assign wren = wren_w;
end else begin
`UNUSED_VAR (write)
`UNUSED_VAR (byteen)
`UNUSED_VAR (write_data)
assign wdata = fill_data;
assign wren = fill;
end
wire [`CLOG2(NUM_WAYS)-1:0] way_idx;
VX_onehot_encoder #(
.N (NUM_WAYS)
) way_enc (
.data_in (way_sel),
.data_out (way_idx),
`UNUSED_PIN (valid_out)
);
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] rdata;
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
VX_sp_ram #(
.DATAW (`CS_LINE_WIDTH * NUM_WAYS),
.SIZE (`CS_LINES_PER_BANK),
.WRENW (BYTEENW),
.NO_RWCHECK (1)
) data_store (
.clk (clk),
.read (1'b1),
.write (write || fill),
.wren (wren),
.addr (line_sel),
.wdata (wdata),
.rdata (rdata)
);
wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata;
if (`CS_WORDS_PER_LINE > 1) begin
assign per_way_rdata = rdata[wsel];
end else begin
`UNUSED_VAR (wsel)
assign per_way_rdata = rdata;
end
assign read_data = per_way_rdata[way_idx];
`UNUSED_VAR (stall)
`ifdef DBG_TRACE_CACHE_DATA
always @(posedge clk) begin
if (fill && ~stall) begin
`TRACE(3, ("%d: %s-bank%0d data-fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data));
end
if (read && ~stall) begin
`TRACE(3, ("%d: %s-bank%0d data-read: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, read_data, req_uuid));
end
if (write && ~stall) begin
`TRACE(3, ("%d: %s-bank%0d data-write: addr=0x%0h, way=%b, blk_addr=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, byteen, write_data, req_uuid));
end
end
`endif
endmodule

View File

@@ -1,72 +1,65 @@
`ifndef VX_CACHE_DEFINE
`define VX_CACHE_DEFINE
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_platform.vh"
`ifndef VX_CACHE_DEFINE_VH
`define VX_CACHE_DEFINE_VH
// cache request identifier
`define DBG_CACHE_REQ_IDW 44
`include "VX_define.vh"
`define REQS_BITS `LOG2UP(NUM_REQS)
`define CS_REQ_SEL_BITS `CLOG2(NUM_REQS)
`define PORTS_BITS `LOG2UP(NUM_PORTS)
`define CS_WORD_WIDTH (8 * WORD_SIZE)
`define CS_LINE_WIDTH (8 * LINE_SIZE)
`define CS_BANK_SIZE (CACHE_SIZE / NUM_BANKS)
`define CS_WAY_SEL_BITS `CLOG2(NUM_WAYS)
// tag valid tid word_sel
`define MSHR_DATA_WIDTH ((CORE_TAG_WIDTH + 1 + `REQS_BITS + `UP(`WORD_SELECT_BITS)) * NUM_PORTS)
`define CS_LINES_PER_BANK (`CS_BANK_SIZE / (LINE_SIZE * NUM_WAYS))
`define CS_WORDS_PER_LINE (LINE_SIZE / WORD_SIZE)
`define WORD_WIDTH (8 * WORD_SIZE)
`define CACHE_LINE_WIDTH (8 * CACHE_LINE_SIZE)
`define BANK_SIZE (CACHE_SIZE / NUM_BANKS)
`define LINES_PER_BANK (`BANK_SIZE / CACHE_LINE_SIZE)
`define WORDS_PER_LINE (CACHE_LINE_SIZE / WORD_SIZE)
`define WORD_ADDR_WIDTH (32-`CLOG2(WORD_SIZE))
`define MEM_ADDR_WIDTH (32-`CLOG2(CACHE_LINE_SIZE))
`define LINE_ADDR_WIDTH (`MEM_ADDR_WIDTH-`CLOG2(NUM_BANKS))
`define CS_WORD_ADDR_WIDTH (`MEM_ADDR_WIDTH-`CLOG2(WORD_SIZE))
`define CS_MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH-`CLOG2(LINE_SIZE))
`define CS_LINE_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH-`CLOG2(NUM_BANKS))
// Word select
`define WORD_SELECT_BITS `CLOG2(`WORDS_PER_LINE)
`define WORD_SELECT_ADDR_START 0
`define WORD_SELECT_ADDR_END (`WORD_SELECT_ADDR_START+`WORD_SELECT_BITS-1)
`define CS_WORD_SEL_BITS `CLOG2(`CS_WORDS_PER_LINE)
`define CS_WORD_SEL_ADDR_START 0
`define CS_WORD_SEL_ADDR_END (`CS_WORD_SEL_ADDR_START+`CS_WORD_SEL_BITS-1)
// Bank select
`define BANK_SELECT_BITS `CLOG2(NUM_BANKS)
`define BANK_SELECT_ADDR_START (1+`WORD_SELECT_ADDR_END+BANK_ADDR_OFFSET)
`define BANK_SELECT_ADDR_END (`BANK_SELECT_ADDR_START+`BANK_SELECT_BITS-1)
`define CS_BANK_SEL_BITS `CLOG2(NUM_BANKS)
`define CS_BANK_SEL_ADDR_START (1+`CS_WORD_SEL_ADDR_END)
`define CS_BANK_SEL_ADDR_END (`CS_BANK_SEL_ADDR_START+`CS_BANK_SEL_BITS-1)
// Line select
`define LINE_SELECT_BITS `CLOG2(`LINES_PER_BANK)
`define LINE_SELECT_ADDR_START (1+`BANK_SELECT_ADDR_END)
`define LINE_SELECT_ADDR_END (`LINE_SELECT_ADDR_START-BANK_ADDR_OFFSET+`LINE_SELECT_BITS-1)
`define CS_LINE_SEL_BITS `CLOG2(`CS_LINES_PER_BANK)
`define CS_LINE_SEL_ADDR_START (1+`CS_BANK_SEL_ADDR_END)
`define CS_LINE_SEL_ADDR_END (`CS_LINE_SEL_ADDR_START+`CS_LINE_SEL_BITS-1)
// Tag select
`define TAG_SELECT_BITS (`WORD_ADDR_WIDTH-1-`LINE_SELECT_ADDR_END)
`define TAG_SELECT_ADDR_START (1+`LINE_SELECT_ADDR_END)
`define TAG_SELECT_ADDR_END (`WORD_ADDR_WIDTH-1)
`define CS_TAG_SEL_BITS (`CS_WORD_ADDR_WIDTH-1-`CS_LINE_SEL_ADDR_END)
`define CS_TAG_SEL_ADDR_START (1+`CS_LINE_SEL_ADDR_END)
`define CS_TAG_SEL_ADDR_END (`CS_WORD_ADDR_WIDTH-1)
`define SELECT_BANK_ID(x) x[`BANK_SELECT_ADDR_END : `BANK_SELECT_ADDR_START]
`define SELECT_LINE_ADDR0(x) x[`WORD_ADDR_WIDTH-1 : `LINE_SELECT_ADDR_START]
`define SELECT_LINE_ADDRX(x) {x[`WORD_ADDR_WIDTH-1 : `LINE_SELECT_ADDR_START], x[`BANK_SELECT_ADDR_START-1 : 1+`WORD_SELECT_ADDR_END]}
`define LINE_TAG_ADDR(x) x[`LINE_ADDR_WIDTH-1 : `LINE_SELECT_BITS]
`define CACHE_REQ_ID_RNG CORE_TAG_WIDTH-1 : (CORE_TAG_WIDTH-`DBG_CACHE_REQ_IDW)
`define CS_LINE_TAG_ADDR(x) x[`CS_LINE_ADDR_WIDTH-1 : `CS_LINE_SEL_BITS]
///////////////////////////////////////////////////////////////////////////////
`define CORE_RSP_TAGS ((CORE_TAG_ID_BITS != 0) ? 1 : NUM_REQS)
`define CS_LINE_TO_MEM_ADDR(x, i) {x, `CS_BANK_SEL_BITS'(i)}
`define CS_MEM_ADDR_TO_BANK_ID(x) x[0 +: `CS_BANK_SEL_BITS]
`define CS_MEM_TAG_TO_REQ_ID(x) x[MSHR_ADDR_WIDTH-1:0]
`define CS_MEM_TAG_TO_BANK_ID(x) x[MSHR_ADDR_WIDTH +: `CS_BANK_SEL_BITS]
`define LINE_TO_MEM_ADDR(x, i) {x, `BANK_SELECT_BITS'(i)}
`define CS_LINE_TO_FULL_ADDR(x, i) {x, (`XLEN-$bits(x))'(i << (`XLEN-$bits(x)-`CS_BANK_SEL_BITS))}
`define CS_MEM_TO_FULL_ADDR(x) {x, (`XLEN-$bits(x))'(0)}
`define MEM_ADDR_TO_BANK_ID(x) x[0 +: `BANK_SELECT_BITS]
`define MEM_TAG_TO_REQ_ID(x) x[MSHR_ADDR_WIDTH-1:0]
`define MEM_TAG_TO_BANK_ID(x) x[MSHR_ADDR_WIDTH +: `BANK_SELECT_BITS]
`define LINE_TO_BYTE_ADDR(x, i) {x, (32-$bits(x))'(i << (32-$bits(x)-`BANK_SELECT_BITS))}
`define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)}
`endif
`endif // VX_CACHE_DEFINE_VH

51
hw/rtl/cache/VX_cache_init.sv vendored Normal file
View File

@@ -0,0 +1,51 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_cache_define.vh"
module VX_cache_init #(
// Size of cache in bytes
parameter CACHE_SIZE = 1024,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 16,
// Number of banks
parameter NUM_BANKS = 1,
// Number of associative ways
parameter NUM_WAYS = 1
) (
input wire clk,
input wire reset,
output wire [`CS_LINE_SEL_BITS-1:0] addr_out,
output wire valid_out
);
reg enabled;
reg [`CS_LINE_SEL_BITS-1:0] line_ctr;
always @(posedge clk) begin
if (reset) begin
enabled <= 1;
line_ctr <= '0;
end else begin
if (enabled) begin
if (line_ctr == ((2 ** `CS_LINE_SEL_BITS)-1)) begin
enabled <= 0;
end
line_ctr <= line_ctr + `CS_LINE_SEL_BITS'(1);
end
end
end
assign addr_out = line_ctr;
assign valid_out = enabled;
endmodule

271
hw/rtl/cache/VX_cache_mshr.sv vendored Normal file
View File

@@ -0,0 +1,271 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_cache_define.vh"
// this is an implementation of a pipelined multi-banked cache
// we allocate a free slot from the MSHR before processing a core request
// and release the slot when we get a cache hit.
// during a memory fill response we initiate the replay sequence
// and dequeue all associated pending entries.
// Warning: This MSHR implementation is strongly coupled with the bank pipeline
// and as such changes to either module requires careful evaluation.
// This implementation makes the following assumptions:
// (1) two-cycle pipeline: st0 and st1.
// (2) core request flow: st0: allocate / lookup, st1: finalize.
// (3) the first dequeue after the fill should happen in st0, when the fill is in st1
// this is enforced inside the bank by "rdw_hazard_st0".
module VX_cache_mshr #(
parameter `STRING INSTANCE_ID= "",
parameter BANK_ID = 0,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 16,
// Number of banks
parameter NUM_BANKS = 1,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 4,
// Request debug identifier
parameter UUID_WIDTH = 0,
// MSHR parameters
parameter DATA_WIDTH = 1,
parameter MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE)
) (
input wire clk,
input wire reset,
`IGNORE_UNUSED_BEGIN
input wire[`UP(UUID_WIDTH)-1:0] deq_req_uuid,
input wire[`UP(UUID_WIDTH)-1:0] lkp_req_uuid,
input wire[`UP(UUID_WIDTH)-1:0] fin_req_uuid,
`IGNORE_UNUSED_END
// allocate
input wire allocate_valid,
input wire [`CS_LINE_ADDR_WIDTH-1:0] allocate_addr,
input wire allocate_rw,
input wire [DATA_WIDTH-1:0] allocate_data,
output wire [MSHR_ADDR_WIDTH-1:0] allocate_id,
output wire [MSHR_ADDR_WIDTH-1:0] allocate_tail,
output wire allocate_ready,
// lookup
input wire lookup_valid,
input wire [`CS_LINE_ADDR_WIDTH-1:0] lookup_addr,
output wire [MSHR_SIZE-1:0] lookup_matches,
// memory fill
input wire fill_valid,
input wire [MSHR_ADDR_WIDTH-1:0] fill_id,
output wire [`CS_LINE_ADDR_WIDTH-1:0] fill_addr,
// dequeue
output wire dequeue_valid,
output wire [`CS_LINE_ADDR_WIDTH-1:0] dequeue_addr,
output wire dequeue_rw,
output wire [DATA_WIDTH-1:0] dequeue_data,
output wire [MSHR_ADDR_WIDTH-1:0] dequeue_id,
input wire dequeue_ready,
// finalize
input wire finalize_valid,
input wire finalize_release,
input wire finalize_pending,
input wire [MSHR_ADDR_WIDTH-1:0] finalize_id,
input wire [MSHR_ADDR_WIDTH-1:0] finalize_tail
);
`UNUSED_PARAM (BANK_ID)
reg [`CS_LINE_ADDR_WIDTH-1:0] addr_table [MSHR_SIZE-1:0];
reg [MSHR_ADDR_WIDTH-1:0] next_index [MSHR_SIZE-1:0];
reg [MSHR_SIZE-1:0] valid_table, valid_table_n;
reg [MSHR_SIZE-1:0] next_table, next_table_x, next_table_n;
reg [MSHR_SIZE-1:0] write_table;
reg allocate_rdy, allocate_rdy_n;
reg [MSHR_ADDR_WIDTH-1:0] allocate_id_r, allocate_id_n;
reg dequeue_val, dequeue_val_n;
reg [MSHR_ADDR_WIDTH-1:0] dequeue_id_r, dequeue_id_n;
wire [MSHR_ADDR_WIDTH-1:0] tail_idx;
wire allocate_fire = allocate_valid && allocate_ready;
wire dequeue_fire = dequeue_valid && dequeue_ready;
wire [MSHR_SIZE-1:0] addr_matches;
for (genvar i = 0; i < MSHR_SIZE; ++i) begin
assign addr_matches[i] = valid_table[i] && (addr_table[i] == lookup_addr);
end
VX_lzc #(
.N (MSHR_SIZE),
.REVERSE (1)
) allocate_sel (
.data_in (~valid_table_n),
.data_out (allocate_id_n),
.valid_out (allocate_rdy_n)
);
VX_onehot_encoder #(
.N (MSHR_SIZE)
) tail_sel (
.data_in (addr_matches & ~next_table_x),
.data_out (tail_idx),
`UNUSED_PIN (valid_out)
);
always @(*) begin
valid_table_n = valid_table;
next_table_x = next_table;
dequeue_val_n = dequeue_val;
dequeue_id_n = dequeue_id;
if (fill_valid) begin
dequeue_val_n = 1;
dequeue_id_n = fill_id;
end
if (dequeue_fire) begin
valid_table_n[dequeue_id] = 0;
if (next_table[dequeue_id]) begin
dequeue_id_n = next_index[dequeue_id];
end else begin
dequeue_val_n = 0;
end
end
if (finalize_valid) begin
if (finalize_release) begin
valid_table_n[finalize_id] = 0;
end
if (finalize_pending) begin
next_table_x[finalize_tail] = 1;
end
end
next_table_n = next_table_x;
if (allocate_fire) begin
valid_table_n[allocate_id] = 1;
next_table_n[allocate_id] = 0;
end
end
always @(posedge clk) begin
if (reset) begin
valid_table <= '0;
allocate_rdy <= 0;
dequeue_val <= 0;
end else begin
valid_table <= valid_table_n;
allocate_rdy <= allocate_rdy_n;
dequeue_val <= dequeue_val_n;
end
if (allocate_fire) begin
addr_table[allocate_id] <= allocate_addr;
write_table[allocate_id] <= allocate_rw;
end
if (finalize_valid && finalize_pending) begin
next_index[finalize_tail] <= finalize_id;
end
dequeue_id_r <= dequeue_id_n;
allocate_id_r <= allocate_id_n;
next_table <= next_table_n;
end
`RUNTIME_ASSERT((~allocate_fire || ~valid_table[allocate_id_r]), ("%t: *** %s-bank%0d inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, BANK_ID,
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, lkp_req_uuid))
`RUNTIME_ASSERT((~finalize_valid || valid_table[finalize_id]), ("%t: *** %s-bank%0d invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, BANK_ID,
`CS_LINE_TO_FULL_ADDR(addr_table[finalize_id], BANK_ID), finalize_id, fin_req_uuid))
`RUNTIME_ASSERT((~fill_valid || valid_table[fill_id]), ("%t: *** %s-bank%0d invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID, BANK_ID,
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), fill_id))
VX_dp_ram #(
.DATAW (DATA_WIDTH),
.SIZE (MSHR_SIZE),
.LUTRAM (1)
) entries (
.clk (clk),
.read (1'b1),
.write (allocate_valid),
`UNUSED_PIN (wren),
.waddr (allocate_id_r),
.wdata (allocate_data),
.raddr (dequeue_id_r),
.rdata (dequeue_data)
);
assign fill_addr = addr_table[fill_id];
assign allocate_ready = allocate_rdy;
assign allocate_id = allocate_id_r;
assign allocate_tail = tail_idx;
assign dequeue_valid = dequeue_val;
assign dequeue_addr = addr_table[dequeue_id_r];
assign dequeue_rw = write_table[dequeue_id_r];
assign dequeue_id = dequeue_id_r;
assign lookup_matches = addr_matches & ~write_table;
`UNUSED_VAR (lookup_valid)
`ifdef DBG_TRACE_CACHE_MSHR
reg show_table;
always @(posedge clk) begin
if (reset) begin
show_table <= 0;
end else begin
show_table <= allocate_fire || lookup_valid || finalize_valid || fill_valid || dequeue_fire;
end
if (allocate_fire)
`TRACE(3, ("%d: %s-bank%0d mshr-allocate: addr=0x%0h, tail=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID,
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_tail, allocate_id, lkp_req_uuid));
if (lookup_valid)
`TRACE(3, ("%d: %s-bank%0d mshr-lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID, BANK_ID,
`CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_matches, lkp_req_uuid));
if (finalize_valid)
`TRACE(3, ("%d: %s-bank%0d mshr-finalize release=%b, pending=%b, tail=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID,
finalize_release, finalize_pending, finalize_tail, finalize_id, fin_req_uuid));
if (fill_valid)
`TRACE(3, ("%d: %s-bank%0d mshr-fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID, BANK_ID,
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id));
if (dequeue_fire)
`TRACE(3, ("%d: %s-bank%0d mshr-dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID,
`CS_LINE_TO_FULL_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_uuid));
if (show_table) begin
`TRACE(3, ("%d: %s-bank%0d mshr-table", $time, INSTANCE_ID, BANK_ID));
for (integer i = 0; i < MSHR_SIZE; ++i) begin
if (valid_table[i]) begin
`TRACE(3, (" %0d=0x%0h", i, `CS_LINE_TO_FULL_ADDR(addr_table[i], BANK_ID)));
if (write_table[i])
`TRACE(3, ("(w)"));
else
`TRACE(3, ("(r)"));
if (next_table[i])
`TRACE(3, ("->%0d", next_index[i]));
end
end
`TRACE(3, ("\n"));
end
end
`endif
endmodule

116
hw/rtl/cache/VX_cache_tags.sv vendored Normal file
View File

@@ -0,0 +1,116 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_cache_define.vh"
module VX_cache_tags #(
parameter `STRING INSTANCE_ID = "",
parameter BANK_ID = 0,
// Size of cache in bytes
parameter CACHE_SIZE = 1024,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 16,
// Number of banks
parameter NUM_BANKS = 1,
// Number of associative ways
parameter NUM_WAYS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 1,
// Request debug identifier
parameter UUID_WIDTH = 0
) (
input wire clk,
input wire reset,
`IGNORE_UNUSED_BEGIN
input wire [`UP(UUID_WIDTH)-1:0] req_uuid,
`IGNORE_UNUSED_END
input wire stall,
// read/fill
input wire lookup,
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
input wire fill,
input wire init,
output wire [NUM_WAYS-1:0] way_sel,
output wire [NUM_WAYS-1:0] tag_matches
);
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (BANK_ID)
`UNUSED_VAR (reset)
`UNUSED_VAR (lookup)
localparam TAG_WIDTH = 1 + `CS_TAG_SEL_BITS;
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_TAG_ADDR(line_addr);
if (NUM_WAYS > 1) begin
reg [NUM_WAYS-1:0] repl_way;
// cyclic assignment of replacement way
always @(posedge clk) begin
if (reset) begin
repl_way <= 1;
end else if (~stall) begin // hold the value on stalls prevent filling different slots twice
repl_way <= {repl_way[NUM_WAYS-2:0], repl_way[NUM_WAYS-1]};
end
end
for (genvar i = 0; i < NUM_WAYS; ++i) begin
assign way_sel[i] = fill && repl_way[i];
end
end else begin
`UNUSED_VAR (stall)
assign way_sel = fill;
end
for (genvar i = 0; i < NUM_WAYS; ++i) begin
wire [`CS_TAG_SEL_BITS-1:0] read_tag;
wire read_valid;
VX_sp_ram #(
.DATAW (TAG_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.NO_RWCHECK (1)
) tag_store (
.clk (clk),
.read (1'b1),
.write (way_sel[i] || init),
`UNUSED_PIN (wren),
.addr (line_sel),
.wdata ({~init, line_tag}),
.rdata ({read_valid, read_tag})
);
assign tag_matches[i] = read_valid && (line_tag == read_tag);
end
`ifdef DBG_TRACE_CACHE_TAG
always @(posedge clk) begin
if (fill && ~stall) begin
`TRACE(3, ("%d: %s-bank%0d tag-fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, line_tag));
end
if (init) begin
`TRACE(3, ("%d: %s-bank%0d tag-init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel));
end
if (lookup && ~stall) begin
if (tag_matches != 0) begin
`TRACE(3, ("%d: %s-bank%0d tag-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, line_tag, req_uuid));
end else begin
`TRACE(3, ("%d: %s-bank%0d tag-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
end
end
end
`endif
endmodule

168
hw/rtl/cache/VX_cache_top.sv vendored Normal file
View File

@@ -0,0 +1,168 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_cache_define.vh"
module VX_cache_top #(
parameter `STRING INSTANCE_ID = "",
// Number of Word requests per cycle
parameter NUM_REQS = 4,
// Size of cache in bytes
parameter CACHE_SIZE = 16384,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 16,
// Number of banks
parameter NUM_BANKS = 4,
// Number of associative ways
parameter NUM_WAYS = 4,
// Size of a word in bytes
parameter WORD_SIZE = 4,
// Core Response Queue Size
parameter CRSQ_SIZE = 2,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 16,
// Memory Response Queue Size
parameter MRSQ_SIZE = 0,
// Memory Request Queue Size
parameter MREQ_SIZE = 4,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// Request debug identifier
parameter UUID_WIDTH = 0,
// core request tag size
parameter TAG_WIDTH = 16,
// Core response output register
parameter CORE_OUT_REG = 2,
// Memory request output register
parameter MEM_OUT_REG = 2,
parameter MEM_TAG_WIDTH = `CLOG2(MSHR_SIZE) + `CLOG2(NUM_BANKS)
) (
input wire clk,
input wire reset,
// PERF
`ifdef PERF_ENABLE
output cache_perf_t cache_perf,
`endif
// Core request
input wire [NUM_REQS-1:0] core_req_valid,
input wire [NUM_REQS-1:0] core_req_rw,
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
input wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr,
input wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data,
input wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag,
output wire [NUM_REQS-1:0] core_req_ready,
// Core response
output wire [NUM_REQS-1:0] core_rsp_valid,
output wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_data,
output wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag,
input wire [NUM_REQS-1:0] core_rsp_ready,
// Memory request
output wire mem_req_valid,
output wire mem_req_rw,
output wire [LINE_SIZE-1:0] mem_req_byteen,
output wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr,
output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
input wire mem_req_ready,
// Memory response
input wire mem_rsp_valid,
input wire [`CS_LINE_WIDTH-1:0] mem_rsp_data,
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag,
output wire mem_rsp_ready
);
VX_mem_bus_if #(
.DATA_SIZE (WORD_SIZE),
.TAG_WIDTH (TAG_WIDTH)
) core_bus_if[NUM_REQS]();
VX_mem_bus_if #(
.DATA_SIZE (LINE_SIZE),
.TAG_WIDTH (MEM_TAG_WIDTH)
) mem_bus_if();
// Core request
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_bus_if[i].req_valid = core_req_valid[i];
assign core_bus_if[i].req_data.rw = core_req_rw[i];
assign core_bus_if[i].req_data.byteen = core_req_byteen[i];
assign core_bus_if[i].req_data.addr = core_req_addr[i];
assign core_bus_if[i].req_data.data = core_req_data[i];
assign core_bus_if[i].req_data.tag = core_req_tag[i];
assign core_req_ready[i] = core_bus_if[i].req_ready;
end
// Core response
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_valid[i] = core_bus_if[i].rsp_valid;
assign core_rsp_data[i] = core_bus_if[i].rsp_data.data;
assign core_rsp_tag[i] = core_bus_if[i].rsp_data.tag;
assign core_bus_if[i].rsp_ready = core_rsp_ready[i];
end
// Memory request
assign mem_req_valid = mem_bus_if.req_valid;
assign mem_req_rw = mem_bus_if.req_data.rw;
assign mem_req_byteen = mem_bus_if.req_data.byteen;
assign mem_req_addr = mem_bus_if.req_data.addr;
assign mem_req_data = mem_bus_if.req_data.data;
assign mem_req_tag = mem_bus_if.req_data.tag;
assign mem_bus_if.req_ready = mem_req_ready;
// Memory response
assign mem_bus_if.rsp_valid = mem_rsp_valid;
assign mem_bus_if.rsp_data.data = mem_rsp_data;
assign mem_bus_if.rsp_data.tag = mem_rsp_tag;
assign mem_rsp_ready = mem_bus_if.rsp_ready;
VX_cache #(
.INSTANCE_ID (INSTANCE_ID),
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_WAYS (NUM_WAYS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.CRSQ_SIZE (CRSQ_SIZE),
.MSHR_SIZE (MSHR_SIZE),
.MRSQ_SIZE (MRSQ_SIZE),
.MREQ_SIZE (MREQ_SIZE),
.TAG_WIDTH (TAG_WIDTH),
.UUID_WIDTH (UUID_WIDTH),
.WRITE_ENABLE (WRITE_ENABLE),
.CORE_OUT_REG (CORE_OUT_REG),
.MEM_OUT_REG (MEM_OUT_REG)
) cache (
`ifdef PERF_ENABLE
.cache_perf (cache_perf),
`endif
.clk (clk),
.reset (reset),
.core_bus_if (core_bus_if),
.mem_bus_if (mem_bus_if)
);
endmodule

494
hw/rtl/cache/VX_cache_wrap.sv vendored Normal file
View File

@@ -0,0 +1,494 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_cache_define.vh"
module VX_cache_wrap import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
// Number of Word requests per cycle
parameter NUM_REQS = 4,
// Size of cache in bytes
parameter CACHE_SIZE = 4096,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 64,
// Number of banks
parameter NUM_BANKS = 1,
// Number of associative ways
parameter NUM_WAYS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 4,
// Core Response Queue Size
parameter CRSQ_SIZE = 2,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 8,
// Memory Response Queue Size
parameter MRSQ_SIZE = 0,
// Memory Request Queue Size
parameter MREQ_SIZE = 4,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// Request debug identifier
parameter UUID_WIDTH = 0,
// core request tag size
parameter TAG_WIDTH = UUID_WIDTH + 1,
// enable bypass for non-cacheable addresses
parameter NC_TAG_BIT = 0,
parameter NC_ENABLE = 0,
// Force bypass for all requests
parameter PASSTHRU = 0,
// Core response output register
parameter CORE_OUT_REG = 0,
// Memory request output register
parameter MEM_OUT_REG = 0
) (
input wire clk,
input wire reset,
// PERF
`ifdef PERF_ENABLE
output cache_perf_t cache_perf,
`endif
VX_mem_bus_if.slave core_bus_if [NUM_REQS],
VX_mem_bus_if.master mem_bus_if
);
`STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid parameter: NUM_BANKS=%d, NUM_REQS=%d", NUM_BANKS, NUM_REQS))
`STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter"))
localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE);
localparam CORE_TAG_X_WIDTH = TAG_WIDTH - NC_ENABLE;
localparam MEM_TAG_X_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS;
localparam MEM_TAG_WIDTH = PASSTHRU ? (NC_ENABLE ? `CACHE_NC_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
`CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH)) :
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
localparam NC_BYPASS = (NC_ENABLE || PASSTHRU);
localparam DIRECT_PASSTHRU = PASSTHRU && (`CS_WORD_SEL_BITS == 0) && (NUM_REQS == 1);
wire [NUM_REQS-1:0] core_req_valid;
wire [NUM_REQS-1:0] core_req_rw;
wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr;
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen;
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data;
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag;
wire [NUM_REQS-1:0] core_req_ready;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_valid[i] = core_bus_if[i].req_valid;
assign core_req_rw[i] = core_bus_if[i].req_data.rw;
assign core_req_addr[i] = core_bus_if[i].req_data.addr;
assign core_req_byteen[i] = core_bus_if[i].req_data.byteen;
assign core_req_data[i] = core_bus_if[i].req_data.data;
assign core_req_tag[i] = core_bus_if[i].req_data.tag;
assign core_bus_if[i].req_ready = core_req_ready[i];
end
///////////////////////////////////////////////////////////////////////////
// Core response buffering
wire [NUM_REQS-1:0] core_rsp_valid_s;
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_data_s;
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s;
wire [NUM_REQS-1:0] core_rsp_ready_s;
for (genvar i = 0; i < NUM_REQS; ++i) begin
`RESET_RELAY (core_rsp_reset, reset);
VX_elastic_buffer #(
.DATAW (`CS_WORD_WIDTH + TAG_WIDTH),
.SIZE ((NC_BYPASS && !DIRECT_PASSTHRU) ? `OUT_REG_TO_EB_SIZE(CORE_OUT_REG) : 0),
.OUT_REG (`OUT_REG_TO_EB_REG(CORE_OUT_REG))
) core_rsp_buf (
.clk (clk),
.reset (core_rsp_reset),
.valid_in (core_rsp_valid_s[i]),
.ready_in (core_rsp_ready_s[i]),
.data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}),
.data_out ({core_bus_if[i].rsp_data.data, core_bus_if[i].rsp_data.tag}),
.valid_out (core_bus_if[i].rsp_valid),
.ready_out (core_bus_if[i].rsp_ready)
);
end
///////////////////////////////////////////////////////////////////////////
// Memory request buffering
wire mem_req_valid_s;
wire mem_req_rw_s;
wire [LINE_SIZE-1:0] mem_req_byteen_s;
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_s;
wire [`CS_LINE_WIDTH-1:0] mem_req_data_s;
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s;
wire mem_req_ready_s;
VX_elastic_buffer #(
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH),
.SIZE ((NC_BYPASS && !DIRECT_PASSTHRU) ? `OUT_REG_TO_EB_SIZE(MEM_OUT_REG) : 0),
.OUT_REG (`OUT_REG_TO_EB_REG(MEM_OUT_REG))
) mem_req_buf (
.clk (clk),
.reset (reset),
.valid_in (mem_req_valid_s),
.ready_in (mem_req_ready_s),
.data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s}),
.data_out ({mem_bus_if.req_data.rw, mem_bus_if.req_data.byteen, mem_bus_if.req_data.addr, mem_bus_if.req_data.data, mem_bus_if.req_data.tag}),
.valid_out (mem_bus_if.req_valid),
.ready_out (mem_bus_if.req_ready)
);
///////////////////////////////////////////////////////////////////////////
// Core request
wire [NUM_REQS-1:0] core_req_valid_b;
wire [NUM_REQS-1:0] core_req_rw_b;
wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr_b;
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen_b;
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data_b;
wire [NUM_REQS-1:0][CORE_TAG_X_WIDTH-1:0] core_req_tag_b;
wire [NUM_REQS-1:0] core_req_ready_b;
// Core response
wire [NUM_REQS-1:0] core_rsp_valid_b;
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_data_b;
wire [NUM_REQS-1:0][CORE_TAG_X_WIDTH-1:0] core_rsp_tag_b;
wire [NUM_REQS-1:0] core_rsp_ready_b;
// Memory request
wire mem_req_valid_b;
wire mem_req_rw_b;
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_b;
wire [LINE_SIZE-1:0] mem_req_byteen_b;
wire [`CS_LINE_WIDTH-1:0] mem_req_data_b;
wire [MEM_TAG_X_WIDTH-1:0] mem_req_tag_b;
wire mem_req_ready_b;
// Memory response
wire mem_rsp_valid_b;
wire [`CS_LINE_WIDTH-1:0] mem_rsp_data_b;
wire [MEM_TAG_X_WIDTH-1:0] mem_rsp_tag_b;
wire mem_rsp_ready_b;
if (NC_BYPASS) begin
`RESET_RELAY (nc_bypass_reset, reset);
VX_cache_bypass #(
.NUM_REQS (NUM_REQS),
.NC_TAG_BIT (NC_TAG_BIT),
.NC_ENABLE (NC_ENABLE),
.PASSTHRU (PASSTHRU),
.CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH),
.CORE_DATA_SIZE (WORD_SIZE),
.CORE_TAG_IN_WIDTH (TAG_WIDTH),
.MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH),
.MEM_DATA_SIZE (LINE_SIZE),
.MEM_TAG_IN_WIDTH (MEM_TAG_X_WIDTH),
.MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH),
.UUID_WIDTH (UUID_WIDTH)
) cache_bypass (
.clk (clk),
.reset (nc_bypass_reset),
// Core request in
.core_req_valid_in (core_req_valid),
.core_req_rw_in (core_req_rw),
.core_req_byteen_in (core_req_byteen),
.core_req_addr_in (core_req_addr),
.core_req_data_in (core_req_data),
.core_req_tag_in (core_req_tag),
.core_req_ready_in (core_req_ready),
// Core request out
.core_req_valid_out (core_req_valid_b),
.core_req_rw_out (core_req_rw_b),
.core_req_byteen_out(core_req_byteen_b),
.core_req_addr_out (core_req_addr_b),
.core_req_data_out (core_req_data_b),
.core_req_tag_out (core_req_tag_b),
.core_req_ready_out (core_req_ready_b),
// Core response in
.core_rsp_valid_in (core_rsp_valid_b),
.core_rsp_data_in (core_rsp_data_b),
.core_rsp_tag_in (core_rsp_tag_b),
.core_rsp_ready_in (core_rsp_ready_b),
// Core response out
.core_rsp_valid_out (core_rsp_valid_s),
.core_rsp_data_out (core_rsp_data_s),
.core_rsp_tag_out (core_rsp_tag_s),
.core_rsp_ready_out (core_rsp_ready_s),
// Memory request in
.mem_req_valid_in (mem_req_valid_b),
.mem_req_rw_in (mem_req_rw_b),
.mem_req_addr_in (mem_req_addr_b),
.mem_req_byteen_in (mem_req_byteen_b),
.mem_req_data_in (mem_req_data_b),
.mem_req_tag_in (mem_req_tag_b),
.mem_req_ready_in (mem_req_ready_b),
// Memory request out
.mem_req_valid_out (mem_req_valid_s),
.mem_req_addr_out (mem_req_addr_s),
.mem_req_rw_out (mem_req_rw_s),
.mem_req_byteen_out (mem_req_byteen_s),
.mem_req_data_out (mem_req_data_s),
.mem_req_tag_out (mem_req_tag_s),
.mem_req_ready_out (mem_req_ready_s),
// Memory response in
.mem_rsp_valid_in (mem_bus_if.rsp_valid),
.mem_rsp_data_in (mem_bus_if.rsp_data.data),
.mem_rsp_tag_in (mem_bus_if.rsp_data.tag),
.mem_rsp_ready_in (mem_bus_if.rsp_ready),
// Memory response out
.mem_rsp_valid_out (mem_rsp_valid_b),
.mem_rsp_data_out (mem_rsp_data_b),
.mem_rsp_tag_out (mem_rsp_tag_b),
.mem_rsp_ready_out (mem_rsp_ready_b)
);
end else begin
assign core_req_valid_b = core_req_valid;
assign core_req_rw_b = core_req_rw;
assign core_req_addr_b = core_req_addr;
assign core_req_byteen_b= core_req_byteen;
assign core_req_data_b = core_req_data;
assign core_req_tag_b = core_req_tag;
assign core_req_ready = core_req_ready_b;
assign core_rsp_valid_s = core_rsp_valid_b;
assign core_rsp_data_s = core_rsp_data_b;
assign core_rsp_tag_s = core_rsp_tag_b;
assign core_rsp_ready_b = core_rsp_ready_s;
assign mem_req_valid_s = mem_req_valid_b;
assign mem_req_addr_s = mem_req_addr_b;
assign mem_req_rw_s = mem_req_rw_b;
assign mem_req_byteen_s = mem_req_byteen_b;
assign mem_req_data_s = mem_req_data_b;
assign mem_req_ready_b = mem_req_ready_s;
// Add explicit NC=0 flag to the memory request tag
VX_bits_insert #(
.N (MEM_TAG_WIDTH-1),
.POS (NC_TAG_BIT)
) mem_req_tag_insert (
.data_in (mem_req_tag_b),
.sel_in (1'b0),
.data_out (mem_req_tag_s)
);
assign mem_rsp_valid_b = mem_bus_if.rsp_valid;
assign mem_rsp_data_b = mem_bus_if.rsp_data.data;
assign mem_bus_if.rsp_ready = mem_rsp_ready_b;
// Remove NC flag from the memory response tag
VX_bits_remove #(
.N (MEM_TAG_WIDTH),
.POS (NC_TAG_BIT)
) mem_rsp_tag_remove (
.data_in (mem_bus_if.rsp_data.tag),
.data_out (mem_rsp_tag_b)
);
end
if (PASSTHRU != 0) begin
`UNUSED_VAR (core_req_valid_b)
`UNUSED_VAR (core_req_rw_b)
`UNUSED_VAR (core_req_addr_b)
`UNUSED_VAR (core_req_byteen_b)
`UNUSED_VAR (core_req_data_b)
`UNUSED_VAR (core_req_tag_b)
assign core_req_ready_b = '0;
assign core_rsp_valid_b = '0;
assign core_rsp_data_b = '0;
assign core_rsp_tag_b = '0;
`UNUSED_VAR (core_rsp_ready_b)
assign mem_req_valid_b = 0;
assign mem_req_addr_b = '0;
assign mem_req_rw_b = '0;
assign mem_req_byteen_b = '0;
assign mem_req_data_b = '0;
assign mem_req_tag_b = '0;
`UNUSED_VAR (mem_req_ready_b)
`UNUSED_VAR (mem_rsp_valid_b)
`UNUSED_VAR (mem_rsp_data_b)
`UNUSED_VAR (mem_rsp_tag_b)
assign mem_rsp_ready_b = 0;
`ifdef PERF_ENABLE
assign cache_perf = '0;
`endif
end else begin
VX_mem_bus_if #(
.DATA_SIZE (WORD_SIZE),
.TAG_WIDTH (CORE_TAG_X_WIDTH)
) core_bus_wrap_if[NUM_REQS]();
VX_mem_bus_if #(
.DATA_SIZE (LINE_SIZE),
.TAG_WIDTH (MEM_TAG_X_WIDTH)
) mem_bus_wrap_if();
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_bus_wrap_if[i].req_valid = core_req_valid_b[i];
assign core_bus_wrap_if[i].req_data.rw = core_req_rw_b[i];
assign core_bus_wrap_if[i].req_data.addr = core_req_addr_b[i];
assign core_bus_wrap_if[i].req_data.byteen = core_req_byteen_b[i];
assign core_bus_wrap_if[i].req_data.data = core_req_data_b[i];
assign core_bus_wrap_if[i].req_data.tag = core_req_tag_b[i];
assign core_req_ready_b[i] = core_bus_wrap_if[i].req_ready;
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_valid_b[i] = core_bus_wrap_if[i].rsp_valid;
assign core_rsp_data_b[i] = core_bus_wrap_if[i].rsp_data.data;
assign core_rsp_tag_b[i] = core_bus_wrap_if[i].rsp_data.tag;
assign core_bus_wrap_if[i].rsp_ready = core_rsp_ready_b[i];
end
assign mem_req_valid_b = mem_bus_wrap_if.req_valid;
assign mem_req_addr_b = mem_bus_wrap_if.req_data.addr;
assign mem_req_rw_b = mem_bus_wrap_if.req_data.rw;
assign mem_req_byteen_b = mem_bus_wrap_if.req_data.byteen;
assign mem_req_data_b = mem_bus_wrap_if.req_data.data;
assign mem_req_tag_b = mem_bus_wrap_if.req_data.tag;
assign mem_bus_wrap_if.req_ready = mem_req_ready_b;
assign mem_bus_wrap_if.rsp_valid = mem_rsp_valid_b;
assign mem_bus_wrap_if.rsp_data.data = mem_rsp_data_b;
assign mem_bus_wrap_if.rsp_data.tag = mem_rsp_tag_b;
assign mem_rsp_ready_b = mem_bus_wrap_if.rsp_ready;
`RESET_RELAY (cache_reset, reset);
VX_cache #(
.INSTANCE_ID (INSTANCE_ID),
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_WAYS (NUM_WAYS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.CRSQ_SIZE (CRSQ_SIZE),
.MSHR_SIZE (MSHR_SIZE),
.MRSQ_SIZE (MRSQ_SIZE),
.MREQ_SIZE (MREQ_SIZE),
.WRITE_ENABLE (WRITE_ENABLE),
.UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (CORE_TAG_X_WIDTH),
.CORE_OUT_REG (NC_BYPASS ? 1 : CORE_OUT_REG),
.MEM_OUT_REG (NC_BYPASS ? 1 : MEM_OUT_REG)
) cache (
.clk (clk),
.reset (cache_reset),
`ifdef PERF_ENABLE
.cache_perf (cache_perf),
`endif
.core_bus_if (core_bus_wrap_if),
.mem_bus_if (mem_bus_wrap_if)
);
end
`ifdef DBG_TRACE_CACHE_BANK
for (genvar i = 0; i < NUM_REQS; ++i) begin
wire [`UP(UUID_WIDTH)-1:0] core_req_uuid;
wire [`UP(UUID_WIDTH)-1:0] core_rsp_uuid;
if (UUID_WIDTH != 0) begin
assign core_req_uuid = core_bus_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH];
assign core_rsp_uuid = core_bus_if[i].rsp_data.tag[TAG_WIDTH-1 -: UUID_WIDTH];
end else begin
assign core_req_uuid = 0;
assign core_rsp_uuid = 0;
end
wire core_req_fire = core_bus_if[i].req_valid && core_bus_if[i].req_ready;
wire core_rsp_fire = core_bus_if[i].rsp_valid && core_bus_if[i].rsp_ready;
always @(posedge clk) begin
if (core_req_fire) begin
if (core_bus_if[i].req_data.rw)
`TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid));
else
`TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid));
end
if (core_rsp_fire) begin
`TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid));
end
end
end
wire [`UP(UUID_WIDTH)-1:0] mem_req_uuid;
wire [`UP(UUID_WIDTH)-1:0] mem_rsp_uuid;
if ((UUID_WIDTH != 0) && (NC_BYPASS != 0)) begin
assign mem_req_uuid = mem_bus_if.req_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
assign mem_rsp_uuid = mem_bus_if.rsp_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
end else begin
assign mem_req_uuid = 0;
assign mem_rsp_uuid = 0;
end
wire mem_req_fire = mem_bus_if.req_valid && mem_bus_if.req_ready;
wire mem_rsp_fire = mem_bus_if.rsp_valid && mem_bus_if.rsp_ready;
always @(posedge clk) begin
if (mem_req_fire) begin
if (mem_bus_if.req_data.rw)
`TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid));
else
`TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid));
end
if (mem_rsp_fire) begin
`TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid));
end
end
`endif
endmodule

View File

@@ -1,314 +0,0 @@
`include "VX_cache_define.vh"
module VX_core_req_bank_sel #(
parameter CACHE_ID = 0,
// Size of line inside a bank in bytes
parameter CACHE_LINE_SIZE = 64,
// Size of a word in bytes
parameter WORD_SIZE = 4,
// Number of banks
parameter NUM_BANKS = 4,
// Number of ports per banks
parameter NUM_PORTS = 1,
// Number of Word requests per cycle
parameter NUM_REQS = 4,
// core request tag size
parameter CORE_TAG_WIDTH = 3,
// bank offset from beginning of index range
parameter BANK_ADDR_OFFSET = 0
) (
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
output wire [`PERF_CTR_BITS-1:0] bank_stalls,
`endif
input wire [NUM_REQS-1:0] core_req_valid,
input wire [NUM_REQS-1:0] core_req_rw,
input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data,
input wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
output wire [NUM_REQS-1:0] core_req_ready,
output wire [NUM_BANKS-1:0] per_bank_core_req_valid,
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_req_pmask,
output wire [NUM_BANKS-1:0] per_bank_core_req_rw,
output wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr,
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] per_bank_core_req_wsel,
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen,
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data,
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid,
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag,
input wire [NUM_BANKS-1:0] per_bank_core_req_ready
);
`UNUSED_PARAM (CACHE_ID)
`STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value"))
`STATIC_ASSERT(NUM_PORTS <= NUM_BANKS, ("invalid value"))
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
wire [NUM_REQS-1:0][`LINE_ADDR_WIDTH-1:0] core_req_line_addr;
wire [NUM_REQS-1:0][`UP(`WORD_SELECT_BITS)-1:0] core_req_wsel;
wire [NUM_REQS-1:0][`UP(`BANK_SELECT_BITS)-1:0] core_req_bid;
for (genvar i = 0; i < NUM_REQS; i++) begin
if (BANK_ADDR_OFFSET == 0) begin
assign core_req_line_addr[i] = `SELECT_LINE_ADDR0(core_req_addr[i]);
end else begin
assign core_req_line_addr[i] = `SELECT_LINE_ADDRX(core_req_addr[i]);
end
assign core_req_wsel[i] = core_req_addr[i][`UP(`WORD_SELECT_BITS)-1:0];
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
if (NUM_BANKS > 1) begin
assign core_req_bid[i] = `SELECT_BANK_ID(core_req_addr[i]);
end else begin
assign core_req_bid[i] = 0;
end
end
reg [NUM_BANKS-1:0] per_bank_core_req_valid_r;
reg [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_req_pmask_r;
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] per_bank_core_req_wsel_r;
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen_r;
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data_r;
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_r;
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_r;
reg [NUM_BANKS-1:0] per_bank_core_req_rw_r;
reg [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr_r;
reg [NUM_REQS-1:0] core_req_ready_r;
if (NUM_REQS > 1) begin
if (NUM_PORTS > 1) begin
reg [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_line_addr_r;
reg [NUM_BANKS-1:0] per_bank_rw_r;
wire [NUM_REQS-1:0] core_req_line_match;
always @(*) begin
per_bank_line_addr_r = 'x;
per_bank_rw_r = 'x;
for (integer i = NUM_REQS-1; i >= 0; --i) begin
if (core_req_valid[i]) begin
per_bank_line_addr_r[core_req_bid[i]] = core_req_line_addr[i];
per_bank_rw_r[core_req_bid[i]] = core_req_rw[i];
end
end
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_line_match[i] = (core_req_line_addr[i] == per_bank_line_addr_r[core_req_bid[i]])
&& (core_req_rw[i] == per_bank_rw_r[core_req_bid[i]]);
end
if (NUM_PORTS < NUM_REQS) begin
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][NUM_REQS-1:0] req_select_table_r;
always @(*) begin
per_bank_core_req_valid_r = 0;
per_bank_core_req_pmask_r = 0;
per_bank_core_req_rw_r = 'x;
per_bank_core_req_addr_r = 'x;
per_bank_core_req_wsel_r = 'x;
per_bank_core_req_byteen_r= 'x;
per_bank_core_req_data_r = 'x;
per_bank_core_req_tag_r = 'x;
per_bank_core_req_tid_r = 'x;
req_select_table_r = 'x;
for (integer i = NUM_REQS-1; i >= 0; --i) begin
if (core_req_valid[i]) begin
per_bank_core_req_valid_r[core_req_bid[i]] = 1;
per_bank_core_req_pmask_r[core_req_bid[i]][i % NUM_PORTS] = core_req_line_match[i];
per_bank_core_req_wsel_r[core_req_bid[i]][i % NUM_PORTS] = core_req_wsel[i];
per_bank_core_req_byteen_r[core_req_bid[i]][i % NUM_PORTS] = core_req_byteen[i];
per_bank_core_req_data_r[core_req_bid[i]][i % NUM_PORTS] = core_req_data[i];
per_bank_core_req_tid_r[core_req_bid[i]][i % NUM_PORTS] = `REQS_BITS'(i);
per_bank_core_req_tag_r[core_req_bid[i]][i % NUM_PORTS] = core_req_tag[i];
per_bank_core_req_rw_r[core_req_bid[i]] = core_req_rw[i];
per_bank_core_req_addr_r[core_req_bid[i]] = core_req_line_addr[i];
req_select_table_r[core_req_bid[i]][i % NUM_PORTS] = (1 << i);
end
end
end
always @(*) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
core_req_ready_r[i] = per_bank_core_req_ready[core_req_bid[i]]
&& core_req_line_match[i]
&& req_select_table_r[core_req_bid[i]][i % NUM_PORTS][i];
end
end
end else begin
always @(*) begin
per_bank_core_req_valid_r = 0;
per_bank_core_req_pmask_r = 0;
per_bank_core_req_rw_r = 'x;
per_bank_core_req_addr_r = 'x;
per_bank_core_req_wsel_r = 'x;
per_bank_core_req_byteen_r= 'x;
per_bank_core_req_data_r = 'x;
per_bank_core_req_tag_r = 'x;
per_bank_core_req_tid_r = 'x;
for (integer i = NUM_REQS-1; i >= 0; --i) begin
if (core_req_valid[i]) begin
per_bank_core_req_valid_r[core_req_bid[i]] = 1;
per_bank_core_req_pmask_r[core_req_bid[i]][i % NUM_PORTS] = core_req_line_match[i];
per_bank_core_req_wsel_r[core_req_bid[i]][i % NUM_PORTS] = core_req_wsel[i];
per_bank_core_req_byteen_r[core_req_bid[i]][i % NUM_PORTS] = core_req_byteen[i];
per_bank_core_req_data_r[core_req_bid[i]][i % NUM_PORTS] = core_req_data[i];
per_bank_core_req_tid_r[core_req_bid[i]][i % NUM_PORTS] = `REQS_BITS'(i);
per_bank_core_req_tag_r[core_req_bid[i]][i % NUM_PORTS] = core_req_tag[i];
per_bank_core_req_rw_r[core_req_bid[i]] = core_req_rw[i];
per_bank_core_req_addr_r[core_req_bid[i]] = core_req_line_addr[i];
end
end
end
always @(*) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
core_req_ready_r[i] = per_bank_core_req_ready[core_req_bid[i]]
&& core_req_line_match[i];
end
end
end
end else begin
always @(*) begin
per_bank_core_req_valid_r = 0;
per_bank_core_req_rw_r = 'x;
per_bank_core_req_addr_r = 'x;
per_bank_core_req_wsel_r = 'x;
per_bank_core_req_byteen_r= 'x;
per_bank_core_req_data_r = 'x;
per_bank_core_req_tag_r = 'x;
per_bank_core_req_tid_r = 'x;
for (integer i = NUM_REQS-1; i >= 0; --i) begin
if (core_req_valid[i]) begin
per_bank_core_req_valid_r[core_req_bid[i]] = 1;
per_bank_core_req_rw_r[core_req_bid[i]] = core_req_rw[i];
per_bank_core_req_addr_r[core_req_bid[i]] = core_req_line_addr[i];
per_bank_core_req_wsel_r[core_req_bid[i]] = core_req_wsel[i];
per_bank_core_req_byteen_r[core_req_bid[i]]= core_req_byteen[i];
per_bank_core_req_data_r[core_req_bid[i]] = core_req_data[i];
per_bank_core_req_tag_r[core_req_bid[i]] = core_req_tag[i];
per_bank_core_req_tid_r[core_req_bid[i]] = `REQS_BITS'(i);
end
end
per_bank_core_req_pmask_r = per_bank_core_req_valid_r;
end
if (NUM_BANKS > 1) begin
always @(*) begin
core_req_ready_r = 0;
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_req_valid_r[i]) begin
core_req_ready_r[per_bank_core_req_tid_r[i]] = per_bank_core_req_ready[i];
end
end
end
end else begin
always @(*) begin
core_req_ready_r = 0;
core_req_ready_r[per_bank_core_req_tid_r[0]] = per_bank_core_req_ready;
end
end
end
end else begin
if (NUM_BANKS > 1) begin
always @(*) begin
per_bank_core_req_valid_r = 0;
per_bank_core_req_rw_r = 'x;
per_bank_core_req_addr_r = 'x;
per_bank_core_req_wsel_r = 'x;
per_bank_core_req_byteen_r= 'x;
per_bank_core_req_data_r = 'x;
per_bank_core_req_tag_r = 'x;
per_bank_core_req_tid_r = 'x;
per_bank_core_req_valid_r[core_req_bid[0]] = core_req_valid;
per_bank_core_req_rw_r[core_req_bid[0]] = core_req_rw;
per_bank_core_req_addr_r[core_req_bid[0]] = core_req_line_addr;
per_bank_core_req_wsel_r[core_req_bid[0]] = core_req_wsel;
per_bank_core_req_byteen_r[core_req_bid[0]] = core_req_byteen;
per_bank_core_req_data_r[core_req_bid[0]] = core_req_data;
per_bank_core_req_tag_r[core_req_bid[0]] = core_req_tag;
per_bank_core_req_tid_r[core_req_bid[0]] = 0;
core_req_ready_r = per_bank_core_req_ready[core_req_bid[0]];
per_bank_core_req_pmask_r = per_bank_core_req_valid_r;
end
end else begin
`UNUSED_VAR (core_req_bid)
always @(*) begin
per_bank_core_req_valid_r = core_req_valid;
per_bank_core_req_rw_r = core_req_rw;
per_bank_core_req_addr_r = core_req_line_addr;
per_bank_core_req_wsel_r = core_req_wsel;
per_bank_core_req_byteen_r = core_req_byteen;
per_bank_core_req_data_r = core_req_data;
per_bank_core_req_tag_r = core_req_tag;
per_bank_core_req_tid_r = 0;
core_req_ready_r = per_bank_core_req_ready;
per_bank_core_req_pmask_r = per_bank_core_req_valid_r;
end
end
end
assign per_bank_core_req_valid = per_bank_core_req_valid_r;
assign per_bank_core_req_pmask = per_bank_core_req_pmask_r;
assign per_bank_core_req_rw = per_bank_core_req_rw_r;
assign per_bank_core_req_addr = per_bank_core_req_addr_r;
assign per_bank_core_req_wsel = per_bank_core_req_wsel_r;
assign per_bank_core_req_byteen = per_bank_core_req_byteen_r;
assign per_bank_core_req_data = per_bank_core_req_data_r;
assign per_bank_core_req_tag = per_bank_core_req_tag_r;
assign per_bank_core_req_tid = per_bank_core_req_tid_r;
assign core_req_ready = core_req_ready_r;
`ifdef PERF_ENABLE
reg [NUM_REQS-1:0] core_req_sel_r;
always @(*) begin
core_req_sel_r = 0;
for (integer i = 0; i < NUM_REQS; ++i) begin
if (core_req_valid[i]) begin
core_req_sel_r[i] = per_bank_core_req_ready[core_req_bid[i]];
end
end
end
reg [`PERF_CTR_BITS-1:0] bank_stalls_r;
wire [$clog2(NUM_REQS+1)-1:0] bank_stall_cnt;
wire [NUM_REQS-1:0] bank_stall_mask = core_req_sel_r & ~core_req_ready;
`POP_COUNT(bank_stall_cnt, bank_stall_mask);
always @(posedge clk) begin
if (reset) begin
bank_stalls_r <= 0;
end else begin
bank_stalls_r <= bank_stalls_r + `PERF_CTR_BITS'(bank_stall_cnt);
end
end
assign bank_stalls = bank_stalls_r;
`endif
endmodule

View File

@@ -1,350 +0,0 @@
`include "VX_cache_define.vh"
module VX_core_rsp_merge #(
parameter CACHE_ID = 0,
// Number of Word requests per cycle
parameter NUM_REQS = 1,
// Number of banks
parameter NUM_BANKS = 1,
// Number of ports per banks
parameter NUM_PORTS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 1,
// core request tag size
parameter CORE_TAG_WIDTH = 1,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0,
// output register
parameter OUT_REG = 0
) (
input wire clk,
input wire reset,
// Per Bank WB
input wire [NUM_BANKS-1:0] per_bank_core_rsp_valid,
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_pmask,
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data,
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid,
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag,
output wire [NUM_BANKS-1:0] per_bank_core_rsp_ready,
// Core Response
output wire [`CORE_RSP_TAGS-1:0] core_rsp_valid,
output wire [NUM_REQS-1:0] core_rsp_tmask,
output wire [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
input wire [`CORE_RSP_TAGS-1:0] core_rsp_ready
);
`UNUSED_PARAM (CACHE_ID)
if (NUM_BANKS > 1) begin
reg [NUM_REQS-1:0] core_rsp_valid_unqual;
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
reg [NUM_BANKS-1:0] per_bank_core_rsp_ready_r;
if (CORE_TAG_ID_BITS != 0) begin
// The core response bus handles a single tag at the time
// We first need to select the current tag to process,
// then send all bank responses for that tag as a batch
wire [CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
wire core_rsp_ready_unqual;
if (NUM_PORTS > 1) begin
reg [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_sent_r, per_bank_core_rsp_sent;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_sent_n;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign per_bank_core_rsp_sent_n[i] = per_bank_core_rsp_sent_r[i] | per_bank_core_rsp_sent[i];
end
always @(posedge clk) begin
if (reset) begin
per_bank_core_rsp_sent_r <= '0;
end else begin
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_rsp_sent_n[i] == per_bank_core_rsp_pmask[i]) begin
per_bank_core_rsp_sent_r[i] <= '0;
end else begin
per_bank_core_rsp_sent_r[i] <= per_bank_core_rsp_sent_n[i];
end
end
end
end
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_valid_p;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
for (genvar p = 0; p < NUM_PORTS; ++p) begin
assign per_bank_core_rsp_valid_p[i][p] = per_bank_core_rsp_valid[i]
&& per_bank_core_rsp_pmask[i][p]
&& !per_bank_core_rsp_sent_r[i][p];
end
end
VX_find_first #(
.N (NUM_BANKS * NUM_PORTS),
.DATAW (CORE_TAG_WIDTH)
) find_first (
.valid_i (per_bank_core_rsp_valid_p),
.data_i (per_bank_core_rsp_tag),
.data_o (core_rsp_tag_unqual),
`UNUSED_PIN (valid_o)
);
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_data_unqual = 'x;
per_bank_core_rsp_sent = 0;
for (integer i = 0; i < NUM_BANKS; ++i) begin
for (integer p = 0; p < NUM_PORTS; ++p) begin
if (per_bank_core_rsp_valid[i]
&& per_bank_core_rsp_pmask[i][p]
&& !per_bank_core_rsp_sent_r[i][p]
&& (per_bank_core_rsp_tag[i][p][CORE_TAG_ID_BITS-1:0] == core_rsp_tag_unqual[CORE_TAG_ID_BITS-1:0])) begin
core_rsp_valid_unqual[per_bank_core_rsp_tid[i][p]] = 1;
core_rsp_data_unqual[per_bank_core_rsp_tid[i][p]] = per_bank_core_rsp_data[i][p];
per_bank_core_rsp_sent[i][p] = core_rsp_ready_unqual;
end
end
end
end
always @(*) begin
for (integer i = 0; i < NUM_BANKS; ++i) begin
per_bank_core_rsp_ready_r[i] = (per_bank_core_rsp_sent_n[i] == per_bank_core_rsp_pmask[i]);
end
end
end else begin
`UNUSED_VAR (per_bank_core_rsp_pmask)
VX_find_first #(
.N (NUM_BANKS),
.DATAW (CORE_TAG_WIDTH)
) find_first (
.valid_i (per_bank_core_rsp_valid),
.data_i (per_bank_core_rsp_tag),
.data_o (core_rsp_tag_unqual),
`UNUSED_PIN (valid_o)
);
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_data_unqual = 'x;
per_bank_core_rsp_ready_r = 0;
for (integer i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_rsp_valid[i]
&& (per_bank_core_rsp_tag[i][0][CORE_TAG_ID_BITS-1:0] == core_rsp_tag_unqual[CORE_TAG_ID_BITS-1:0])) begin
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
per_bank_core_rsp_ready_r[i] = core_rsp_ready_unqual;
end
end
end
end
wire core_rsp_valid_any = (| per_bank_core_rsp_valid);
VX_skid_buffer #(
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH)),
.PASSTHRU (0 == OUT_REG)
) out_sbuf (
.clk (clk),
.reset (reset),
.valid_in (core_rsp_valid_any),
.data_in ({core_rsp_valid_unqual, core_rsp_tag_unqual, core_rsp_data_unqual}),
.ready_in (core_rsp_ready_unqual),
.valid_out (core_rsp_valid),
.data_out ({core_rsp_tmask, core_rsp_tag, core_rsp_data}),
.ready_out (core_rsp_ready)
);
end else begin
reg [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
wire [NUM_REQS-1:0] core_rsp_ready_unqual;
if (NUM_PORTS > 1) begin
reg [NUM_REQS-1:0][(`PORTS_BITS + `BANK_SELECT_BITS)-1:0] bank_select_table;
reg [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_sent_r, per_bank_core_rsp_sent;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_sent_n;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign per_bank_core_rsp_sent_n[i] = per_bank_core_rsp_sent_r[i] | per_bank_core_rsp_sent[i];
end
always @(posedge clk) begin
if (reset) begin
per_bank_core_rsp_sent_r <= '0;
end else begin
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_rsp_sent_n[i] == per_bank_core_rsp_pmask[i]) begin
per_bank_core_rsp_sent_r[i] <= '0;
end else begin
per_bank_core_rsp_sent_r[i] <= per_bank_core_rsp_sent_n[i];
end
end
end
end
always @(*) begin
core_rsp_valid_unqual = '0;
core_rsp_tag_unqual = 'x;
core_rsp_data_unqual = 'x;
bank_select_table = 'x;
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
for (integer p = 0; p < NUM_PORTS; ++p) begin
if (per_bank_core_rsp_valid[i]
&& per_bank_core_rsp_pmask[i][p]
&& !per_bank_core_rsp_sent_r[i][p]) begin
core_rsp_valid_unqual[per_bank_core_rsp_tid[i][p]] = 1;
core_rsp_tag_unqual[per_bank_core_rsp_tid[i][p]] = per_bank_core_rsp_tag[i][p];
core_rsp_data_unqual[per_bank_core_rsp_tid[i][p]] = per_bank_core_rsp_data[i][p];
bank_select_table[per_bank_core_rsp_tid[i][p]] = {`PORTS_BITS'(p), `BANK_SELECT_BITS'(i)};
end
end
end
end
always @(*) begin
per_bank_core_rsp_sent = '0;
for (integer i = 0; i < NUM_REQS; i++) begin
if (core_rsp_valid_unqual[i]) begin
per_bank_core_rsp_sent[bank_select_table[i][0 +: `BANK_SELECT_BITS]][bank_select_table[i][`BANK_SELECT_BITS +: `PORTS_BITS]] = core_rsp_ready_unqual[i];
end
end
end
always @(*) begin
for (integer i = 0; i < NUM_BANKS; i++) begin
per_bank_core_rsp_ready_r[i] = (per_bank_core_rsp_sent_n[i] == per_bank_core_rsp_pmask[i]);
end
end
end else begin
`UNUSED_VAR (per_bank_core_rsp_pmask)
reg [NUM_REQS-1:0][NUM_BANKS-1:0] bank_select_table;
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_tag_unqual = 'x;
core_rsp_data_unqual = 'x;
bank_select_table = 'x;
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
if (per_bank_core_rsp_valid[i]) begin
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
core_rsp_tag_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
bank_select_table[per_bank_core_rsp_tid[i]] = (1 << i);
end
end
end
always @(*) begin
for (integer i = 0; i < NUM_BANKS; ++i) begin
per_bank_core_rsp_ready_r[i] = core_rsp_ready_unqual[per_bank_core_rsp_tid[i]]
&& bank_select_table[per_bank_core_rsp_tid[i]][i];
end
end
end
for (genvar i = 0; i < NUM_REQS; i++) begin
VX_skid_buffer #(
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH),
.PASSTHRU (0 == OUT_REG)
) out_sbuf (
.clk (clk),
.reset (reset),
.valid_in (core_rsp_valid_unqual[i]),
.data_in ({core_rsp_tag_unqual[i], core_rsp_data_unqual[i]}),
.ready_in (core_rsp_ready_unqual[i]),
.valid_out (core_rsp_valid[i]),
.data_out ({core_rsp_tag[i],core_rsp_data[i]}),
.ready_out (core_rsp_ready[i])
);
end
assign core_rsp_tmask = core_rsp_valid;
end
assign per_bank_core_rsp_ready = per_bank_core_rsp_ready_r;
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
`UNUSED_VAR (per_bank_core_rsp_pmask)
if (NUM_REQS > 1) begin
reg [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
if (CORE_TAG_ID_BITS != 0) begin
reg [NUM_REQS-1:0] core_rsp_tmask_unqual;
always @(*) begin
core_rsp_tmask_unqual = 0;
core_rsp_tmask_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_valid;
core_rsp_tag_unqual = per_bank_core_rsp_tag;
core_rsp_data_unqual = 'x;
core_rsp_data_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_data;
end
assign core_rsp_valid = per_bank_core_rsp_valid;
assign core_rsp_tmask = core_rsp_tmask_unqual;
assign per_bank_core_rsp_ready = core_rsp_ready;
end else begin
reg [`CORE_RSP_TAGS-1:0] core_rsp_valid_unqual;
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_valid_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_valid;
core_rsp_tag_unqual = 'x;
core_rsp_tag_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_tag;
core_rsp_data_unqual = 'x;
core_rsp_data_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_data;
end
assign core_rsp_valid = core_rsp_valid_unqual;
assign core_rsp_tmask = core_rsp_valid_unqual;
assign per_bank_core_rsp_ready = core_rsp_ready[per_bank_core_rsp_tid];
end
assign core_rsp_tag = core_rsp_tag_unqual;
assign core_rsp_data = core_rsp_data_unqual;
end else begin
`UNUSED_VAR(per_bank_core_rsp_tid)
assign core_rsp_valid = per_bank_core_rsp_valid;
assign core_rsp_tmask = per_bank_core_rsp_valid;
assign core_rsp_tag = per_bank_core_rsp_tag;
assign core_rsp_data = per_bank_core_rsp_data;
assign per_bank_core_rsp_ready = core_rsp_ready;
end
end
endmodule

View File

@@ -1,133 +0,0 @@
`include "VX_cache_define.vh"
module VX_data_access #(
parameter CACHE_ID = 0,
parameter BANK_ID = 0,
// Size of cache in bytes
parameter CACHE_SIZE = 1,
// Size of line inside a bank in bytes
parameter CACHE_LINE_SIZE = 1,
// Number of banks
parameter NUM_BANKS = 1,
// Number of ports per banks
parameter NUM_PORTS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 1,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
parameter WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
) (
input wire clk,
input wire reset,
`IGNORE_UNUSED_BEGIN
input wire[`DBG_CACHE_REQ_IDW-1:0] req_id,
`IGNORE_UNUSED_END
input wire stall,
input wire read,
input wire fill,
input wire write,
input wire[`LINE_ADDR_WIDTH-1:0] addr,
input wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] wsel,
input wire [NUM_PORTS-1:0] pmask,
input wire [NUM_PORTS-1:0][WORD_SIZE-1:0] byteen,
input wire [`WORDS_PER_LINE-1:0][`WORD_WIDTH-1:0] fill_data,
input wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] write_data,
output wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] read_data
);
`UNUSED_PARAM (CACHE_ID)
`UNUSED_PARAM (BANK_ID)
`UNUSED_PARAM (WORD_SIZE)
`UNUSED_VAR (reset)
`UNUSED_VAR (addr)
`UNUSED_VAR (read)
localparam BYTEENW = WRITE_ENABLE ? CACHE_LINE_SIZE : 1;
wire [`WORDS_PER_LINE-1:0][`WORD_WIDTH-1:0] rdata;
wire [`WORDS_PER_LINE-1:0][`WORD_WIDTH-1:0] wdata;
wire [BYTEENW-1:0] wren;
wire [`LINE_SELECT_BITS-1:0] line_addr = addr[`LINE_SELECT_BITS-1:0];
if (WRITE_ENABLE) begin
if (`WORDS_PER_LINE > 1) begin
reg [`WORDS_PER_LINE-1:0][`WORD_WIDTH-1:0] wdata_r;
reg [`WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_r;
if (NUM_PORTS > 1) begin
always @(*) begin
wdata_r = 'x;
wren_r = 0;
for (integer i = 0; i < NUM_PORTS; ++i) begin
if (pmask[i]) begin
wdata_r[wsel[i]] = write_data[i];
wren_r[wsel[i]] = byteen[i];
end
end
end
end else begin
`UNUSED_VAR (pmask)
always @(*) begin
wdata_r = {`WORDS_PER_LINE{write_data}};
wren_r = 0;
wren_r[wsel] = byteen;
end
end
assign wdata = write ? wdata_r : fill_data;
assign wren = write ? wren_r : {BYTEENW{fill}};
end else begin
`UNUSED_VAR (wsel)
`UNUSED_VAR (pmask)
assign wdata = write ? write_data : fill_data;
assign wren = write ? byteen : {BYTEENW{fill}};
end
end else begin
`UNUSED_VAR (write)
`UNUSED_VAR (byteen)
`UNUSED_VAR (pmask)
`UNUSED_VAR (write_data)
assign wdata = fill_data;
assign wren = fill;
end
VX_sp_ram #(
.DATAW (`CACHE_LINE_WIDTH),
.SIZE (`LINES_PER_BANK),
.BYTEENW (BYTEENW),
.NO_RWCHECK (1)
) data_store (
.clk (clk),
.addr (line_addr),
.wren (wren),
.wdata (wdata),
.rdata (rdata)
);
if (`WORDS_PER_LINE > 1) begin
for (genvar i = 0; i < NUM_PORTS; ++i) begin
assign read_data[i] = rdata[wsel[i]];
end
end else begin
assign read_data = rdata;
end
`UNUSED_VAR (stall)
`ifdef DBG_TRACE_CACHE_DATA
always @(posedge clk) begin
if (fill && ~stall) begin
dpi_trace("%d: cache%0d:%0d data-fill: addr=%0h, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, fill_data);
end
if (read && ~stall) begin
dpi_trace("%d: cache%0d:%0d data-read: addr=%0h, blk_addr=%0d, data=%0h (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, read_data, req_id);
end
if (write && ~stall) begin
dpi_trace("%d: cache%0d:%0d data-write: addr=%0h, byteen=%b, blk_addr=%0d, data=%0h (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), byteen, line_addr, write_data, req_id);
end
end
`endif
endmodule

View File

@@ -1,36 +0,0 @@
`include "VX_cache_define.vh"
module VX_flush_ctrl #(
// Size of cache in bytes
parameter CACHE_SIZE = 16384,
// Size of line inside a bank in bytes
parameter CACHE_LINE_SIZE = 1,
// Number of banks
parameter NUM_BANKS = 1
) (
input wire clk,
input wire reset,
output wire [`LINE_SELECT_BITS-1:0] addr_out,
output wire valid_out
);
reg flush_enable;
reg [`LINE_SELECT_BITS-1:0] flush_ctr;
always @(posedge clk) begin
if (reset) begin
flush_enable <= 1;
flush_ctr <= 0;
end else begin
if (flush_enable) begin
if (flush_ctr == ((2 ** `LINE_SELECT_BITS)-1)) begin
flush_enable <= 0;
end
flush_ctr <= flush_ctr + 1;
end
end
end
assign addr_out = flush_ctr;
assign valid_out = flush_enable;
endmodule

View File

@@ -1,234 +0,0 @@
`include "VX_cache_define.vh"
module VX_miss_resrv #(
parameter CACHE_ID = 0,
parameter BANK_ID = 0,
// Number of Word requests per cycle
parameter NUM_REQS = 1,
// Size of line inside a bank in bytes
parameter CACHE_LINE_SIZE = 1,
// Number of banks
parameter NUM_BANKS = 1,
// Number of ports per banks
parameter NUM_PORTS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 1,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 1,
// core request tag size
parameter CORE_TAG_WIDTH = 1,
parameter MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE)
) (
input wire clk,
input wire reset,
`IGNORE_UNUSED_BEGIN
input wire[`DBG_CACHE_REQ_IDW-1:0] deq_req_id,
input wire[`DBG_CACHE_REQ_IDW-1:0] lkp_req_id,
input wire[`DBG_CACHE_REQ_IDW-1:0] rel_req_id,
`IGNORE_UNUSED_END
// allocate
input wire allocate_valid,
input wire [`LINE_ADDR_WIDTH-1:0] allocate_addr,
input wire [`MSHR_DATA_WIDTH-1:0] allocate_data,
output wire [MSHR_ADDR_WIDTH-1:0] allocate_id,
output wire allocate_ready,
// fill
input wire fill_valid,
input wire [MSHR_ADDR_WIDTH-1:0] fill_id,
output wire [`LINE_ADDR_WIDTH-1:0] fill_addr,
// lookup
input wire lookup_valid,
input wire lookup_replay,
input wire [MSHR_ADDR_WIDTH-1:0] lookup_id,
input wire [`LINE_ADDR_WIDTH-1:0] lookup_addr,
output wire lookup_match,
// dequeue
output wire dequeue_valid,
output wire [MSHR_ADDR_WIDTH-1:0] dequeue_id,
output wire [`LINE_ADDR_WIDTH-1:0] dequeue_addr,
output wire [`MSHR_DATA_WIDTH-1:0] dequeue_data,
input wire dequeue_ready,
// release
input wire release_valid,
input wire [MSHR_ADDR_WIDTH-1:0] release_id
);
`UNUSED_PARAM (CACHE_ID)
`UNUSED_PARAM (BANK_ID)
reg [MSHR_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table, addr_table_n;
reg [MSHR_SIZE-1:0] valid_table, valid_table_n;
reg [MSHR_SIZE-1:0] ready_table, ready_table_n;
reg allocate_rdy_r, allocate_rdy_n;
reg [MSHR_ADDR_WIDTH-1:0] allocate_id_r, allocate_id_n;
reg dequeue_val_r, dequeue_val_n, dequeue_val_x;
reg [MSHR_ADDR_WIDTH-1:0] dequeue_id_r, dequeue_id_n, dequeue_id_x;
reg [MSHR_SIZE-1:0] valid_table_x;
reg [MSHR_SIZE-1:0] ready_table_x;
wire [MSHR_SIZE-1:0] addr_matches;
wire allocate_fire = allocate_valid && allocate_ready;
wire dequeue_fire = dequeue_valid && dequeue_ready;
for (genvar i = 0; i < MSHR_SIZE; ++i) begin
assign addr_matches[i] = (addr_table[i] == lookup_addr);
end
always @(*) begin
valid_table_x = valid_table;
ready_table_x = ready_table;
if (dequeue_fire) begin
valid_table_x[dequeue_id] = 0;
end
if (lookup_replay) begin
ready_table_x |= addr_matches;
end
end
VX_lzc #(
.N (MSHR_SIZE)
) dequeue_sel (
.in_i (valid_table_x & ready_table_x),
.cnt_o (dequeue_id_x),
.valid_o (dequeue_val_x)
);
VX_lzc #(
.N (MSHR_SIZE)
) allocate_sel (
.in_i (~valid_table_n),
.cnt_o (allocate_id_n),
.valid_o (allocate_rdy_n)
);
always @(*) begin
valid_table_n = valid_table_x;
ready_table_n = ready_table_x;
addr_table_n = addr_table;
dequeue_val_n = dequeue_val_r;
dequeue_id_n = dequeue_id_r;
if (dequeue_fire) begin
dequeue_val_n = dequeue_val_x;
dequeue_id_n = dequeue_id_x;
end
if (allocate_fire) begin
valid_table_n[allocate_id] = 1;
ready_table_n[allocate_id] = 0;
addr_table_n[allocate_id] = allocate_addr;
end
if (fill_valid) begin
dequeue_val_n = 1;
dequeue_id_n = fill_id;
end
if (release_valid) begin
valid_table_n[release_id] = 0;
end
end
always @(posedge clk) begin
if (reset) begin
valid_table <= 0;
allocate_rdy_r <= 0;
dequeue_val_r <= 0;
end else begin
valid_table <= valid_table_n;
allocate_rdy_r <= allocate_rdy_n;
dequeue_val_r <= dequeue_val_n;
end
ready_table <= ready_table_n;
addr_table <= addr_table_n;
dequeue_id_r <= dequeue_id_n;
allocate_id_r <= allocate_id_n;
`ASSERT(!allocate_fire || !valid_table[allocate_id_r], ("runtime error"));
`ASSERT(!release_valid || valid_table[release_id], ("runtime error"));
end
`RUNTIME_ASSERT((!allocate_fire || ~valid_table[allocate_id]), ("%t: *** cache%0d:%0d in-use allocation: addr=%0h, id=%0d", $time, CACHE_ID, BANK_ID,
`LINE_TO_BYTE_ADDR(allocate_addr, BANK_ID), allocate_id))
`RUNTIME_ASSERT((!fill_valid || valid_table[fill_id]), ("%t: *** cache%0d:%0d invalid fill: addr=%0h, id=%0d", $time, CACHE_ID, BANK_ID,
`LINE_TO_BYTE_ADDR(addr_table[fill_id], BANK_ID), fill_id))
VX_dp_ram #(
.DATAW (`MSHR_DATA_WIDTH),
.SIZE (MSHR_SIZE),
.LUTRAM (1)
) entries (
.clk (clk),
.waddr (allocate_id_r),
.raddr (dequeue_id_r),
.wren (allocate_valid),
.wdata (allocate_data),
.rdata (dequeue_data)
);
assign fill_addr = addr_table[fill_id];
assign allocate_ready = allocate_rdy_r;
assign allocate_id = allocate_id_r;
assign dequeue_valid = dequeue_val_r;
assign dequeue_id = dequeue_id_r;
assign dequeue_addr = addr_table[dequeue_id_r];
wire [MSHR_SIZE-1:0] lookup_entries;
for (genvar i = 0; i < MSHR_SIZE; ++i) begin
assign lookup_entries[i] = (i != lookup_id);
end
assign lookup_match = |(lookup_entries & valid_table & addr_matches);
`UNUSED_VAR (lookup_valid)
`ifdef DBG_TRACE_CACHE_MSHR
always @(posedge clk) begin
if (allocate_fire || fill_valid || dequeue_fire || lookup_replay || lookup_valid || release_valid) begin
if (allocate_fire)
dpi_trace("%d: cache%0d:%0d mshr-allocate: addr=%0h, id=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID,
`LINE_TO_BYTE_ADDR(allocate_addr, BANK_ID), allocate_id, deq_req_id);
if (fill_valid)
dpi_trace("%d: cache%0d:%0d mshr-fill: addr=%0h, id=%0d, addr=%0h\n", $time, CACHE_ID, BANK_ID,
`LINE_TO_BYTE_ADDR(addr_table[fill_id], BANK_ID), fill_id, `LINE_TO_BYTE_ADDR(fill_addr, BANK_ID));
if (dequeue_fire)
dpi_trace("%d: cache%0d:%0d mshr-dequeue: addr=%0h, id=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID,
`LINE_TO_BYTE_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_id);
if (lookup_replay)
dpi_trace("%d: cache%0d:%0d mshr-replay: addr=%0h, id=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID,
`LINE_TO_BYTE_ADDR(lookup_addr, BANK_ID), lookup_id, lkp_req_id);
if (lookup_valid)
dpi_trace("%d: cache%0d:%0d mshr-lookup: addr=%0h, id=%0d, match=%b (#%0d)\n", $time, CACHE_ID, BANK_ID,
`LINE_TO_BYTE_ADDR(lookup_addr, BANK_ID), lookup_id, lookup_match, lkp_req_id);
if (release_valid)
dpi_trace("%d: cache%0d:%0d mshr-release id=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID, release_id, rel_req_id);
dpi_trace("%d: cache%0d:%0d mshr-table", $time, CACHE_ID, BANK_ID);
for (integer i = 0; i < MSHR_SIZE; ++i) begin
if (valid_table[i]) begin
dpi_trace(" ");
if (ready_table[i])
dpi_trace("*");
dpi_trace("%0d=%0h", i, `LINE_TO_BYTE_ADDR(addr_table[i], BANK_ID));
end
end
dpi_trace("\n");
end
end
`endif
endmodule

View File

@@ -1,323 +0,0 @@
`include "VX_cache_define.vh"
module VX_nc_bypass #(
parameter NUM_PORTS = 1,
parameter NUM_REQS = 1,
parameter NUM_RSP_TAGS = 0,
parameter NC_TAG_BIT = 0,
parameter CORE_ADDR_WIDTH = 1,
parameter CORE_DATA_SIZE = 1,
parameter CORE_TAG_IN_WIDTH = 1,
parameter MEM_ADDR_WIDTH = 1,
parameter MEM_DATA_SIZE = 1,
parameter MEM_TAG_IN_WIDTH = 1,
parameter MEM_TAG_OUT_WIDTH = 1,
parameter CORE_DATA_WIDTH = CORE_DATA_SIZE * 8,
parameter MEM_DATA_WIDTH = MEM_DATA_SIZE * 8,
parameter CORE_TAG_OUT_WIDTH = CORE_TAG_IN_WIDTH - 1,
parameter MEM_SELECT_BITS = `UP(`CLOG2(MEM_DATA_SIZE / CORE_DATA_SIZE))
) (
input wire clk,
input wire reset,
// Core request in
input wire [NUM_REQS-1:0] core_req_valid_in,
input wire [NUM_REQS-1:0] core_req_rw_in,
input wire [NUM_REQS-1:0][CORE_ADDR_WIDTH-1:0] core_req_addr_in,
input wire [NUM_REQS-1:0][CORE_DATA_SIZE-1:0] core_req_byteen_in,
input wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_req_data_in,
input wire [NUM_REQS-1:0][CORE_TAG_IN_WIDTH-1:0] core_req_tag_in,
output wire [NUM_REQS-1:0] core_req_ready_in,
// Core request out
output wire [NUM_REQS-1:0] core_req_valid_out,
output wire [NUM_REQS-1:0] core_req_rw_out,
output wire [NUM_REQS-1:0][CORE_ADDR_WIDTH-1:0] core_req_addr_out,
output wire [NUM_REQS-1:0][CORE_DATA_SIZE-1:0] core_req_byteen_out,
output wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_req_data_out,
output wire [NUM_REQS-1:0][CORE_TAG_OUT_WIDTH-1:0] core_req_tag_out,
input wire [NUM_REQS-1:0] core_req_ready_out,
// Core response in
input wire [NUM_RSP_TAGS-1:0] core_rsp_valid_in,
input wire [NUM_REQS-1:0] core_rsp_tmask_in,
input wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_in,
input wire [NUM_RSP_TAGS-1:0][CORE_TAG_OUT_WIDTH-1:0] core_rsp_tag_in,
output wire [NUM_RSP_TAGS-1:0] core_rsp_ready_in,
// Core response out
output wire [NUM_RSP_TAGS-1:0] core_rsp_valid_out,
output wire [NUM_REQS-1:0] core_rsp_tmask_out,
output wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_out,
output wire [NUM_RSP_TAGS-1:0][CORE_TAG_IN_WIDTH-1:0] core_rsp_tag_out,
input wire [NUM_RSP_TAGS-1:0] core_rsp_ready_out,
// Memory request in
input wire mem_req_valid_in,
input wire mem_req_rw_in,
input wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_in,
input wire [NUM_PORTS-1:0] mem_req_pmask_in,
input wire [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_in,
input wire [NUM_PORTS-1:0][MEM_SELECT_BITS-1:0] mem_req_wsel_in,
input wire [NUM_PORTS-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in,
input wire [MEM_TAG_IN_WIDTH-1:0] mem_req_tag_in,
output wire mem_req_ready_in,
// Memory request out
output wire mem_req_valid_out,
output wire mem_req_rw_out,
output wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_out,
output wire [NUM_PORTS-1:0] mem_req_pmask_out,
output wire [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_out,
output wire [NUM_PORTS-1:0][MEM_SELECT_BITS-1:0] mem_req_wsel_out,
output wire [NUM_PORTS-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_out,
output wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_tag_out,
input wire mem_req_ready_out,
// Memory response in
input wire mem_rsp_valid_in,
input wire [MEM_DATA_WIDTH-1:0] mem_rsp_data_in,
input wire [MEM_TAG_OUT_WIDTH-1:0] mem_rsp_tag_in,
output wire mem_rsp_ready_in,
// Memory response out
output wire mem_rsp_valid_out,
output wire [MEM_DATA_WIDTH-1:0] mem_rsp_data_out,
output wire [MEM_TAG_IN_WIDTH-1:0] mem_rsp_tag_out,
input wire mem_rsp_ready_out
);
`STATIC_ASSERT((NUM_RSP_TAGS == 1 || NUM_RSP_TAGS == NUM_REQS), ("invalid paramter"))
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
localparam CORE_REQ_TIDW = $clog2(NUM_REQS);
localparam MUX_DATAW = CORE_TAG_IN_WIDTH + CORE_DATA_WIDTH + CORE_DATA_SIZE + CORE_ADDR_WIDTH + 1;
localparam CORE_LDATAW = $clog2(CORE_DATA_WIDTH);
localparam MEM_LDATAW = $clog2(MEM_DATA_WIDTH);
localparam D = MEM_LDATAW - CORE_LDATAW;
// core request handling
wire [NUM_REQS-1:0] core_req_valid_in_nc;
wire [NUM_REQS-1:0] core_req_nc_tids;
wire [`UP(CORE_REQ_TIDW)-1:0] core_req_nc_tid;
wire [NUM_REQS-1:0] core_req_nc_sel;
wire core_req_nc_valid;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_nc_tids[i] = core_req_tag_in[i][NC_TAG_BIT];
end
assign core_req_valid_in_nc = core_req_valid_in & core_req_nc_tids;
VX_priority_encoder #(
.N (NUM_REQS)
) core_req_sel (
.data_in (core_req_valid_in_nc),
.index (core_req_nc_tid),
.onehot (core_req_nc_sel),
.valid_out (core_req_nc_valid)
);
assign core_req_valid_out = core_req_valid_in & ~core_req_nc_tids;
assign core_req_rw_out = core_req_rw_in;
assign core_req_addr_out = core_req_addr_in;
assign core_req_byteen_out = core_req_byteen_in;
assign core_req_data_out = core_req_data_in;
for (genvar i = 0; i < NUM_REQS; ++i) begin
VX_bits_remove #(
.N (CORE_TAG_IN_WIDTH),
.S (1),
.POS (NC_TAG_BIT)
) core_req_tag_remove (
.data_in (core_req_tag_in[i]),
.data_out (core_req_tag_out[i])
);
end
if (NUM_REQS > 1) begin
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_ready_in[i] = core_req_valid_in_nc[i] ?
(~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i]) : core_req_ready_out[i];
end
end else begin
assign core_req_ready_in = core_req_valid_in_nc ? (~mem_req_valid_in && mem_req_ready_out) : core_req_ready_out;
end
// memory request handling
assign mem_req_valid_out = mem_req_valid_in || core_req_nc_valid;
assign mem_req_ready_in = mem_req_ready_out;
wire [(MEM_TAG_IN_WIDTH+1)-1:0] mem_req_tag_in_c;
VX_bits_insert #(
.N (MEM_TAG_IN_WIDTH),
.S (1),
.POS (NC_TAG_BIT)
) mem_req_tag_insert (
.data_in (mem_req_tag_in),
.sel_in ('0),
.data_out (mem_req_tag_in_c)
);
wire [CORE_TAG_IN_WIDTH-1:0] core_req_tag_in_sel;
wire [CORE_DATA_WIDTH-1:0] core_req_data_in_sel;
wire [CORE_DATA_SIZE-1:0] core_req_byteen_in_sel;
wire [CORE_ADDR_WIDTH-1:0] core_req_addr_in_sel;
wire core_req_rw_in_sel;
if (NUM_REQS > 1) begin
wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_nc_mux_in[i] = {core_req_tag_in[i], core_req_data_in[i], core_req_byteen_in[i], core_req_addr_in[i], core_req_rw_in[i]};
end
assign {core_req_tag_in_sel, core_req_data_in_sel, core_req_byteen_in_sel, core_req_addr_in_sel, core_req_rw_in_sel} = core_req_nc_mux_in[core_req_nc_tid];
end else begin
assign core_req_tag_in_sel = core_req_tag_in;
assign core_req_data_in_sel = core_req_data_in;
assign core_req_byteen_in_sel = core_req_byteen_in;
assign core_req_addr_in_sel = core_req_addr_in;
assign core_req_rw_in_sel = core_req_rw_in;
end
assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in_sel;
assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in_sel[D +: MEM_ADDR_WIDTH];
if (D != 0) begin
reg [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_in_r;
reg [NUM_PORTS-1:0][MEM_SELECT_BITS-1:0] mem_req_wsel_in_r;
reg [NUM_PORTS-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r;
wire [D-1:0] req_addr_idx = core_req_addr_in_sel[D-1:0];
always @(*) begin
mem_req_byteen_in_r = 0;
mem_req_byteen_in_r[0] = core_req_byteen_in_sel;
mem_req_wsel_in_r = 'x;
mem_req_wsel_in_r[0] = req_addr_idx;
mem_req_data_in_r = 'x;
mem_req_data_in_r[0] = core_req_data_in_sel;
end
assign mem_req_pmask_out = mem_req_valid_in ? mem_req_pmask_in : NUM_PORTS'(1'b1);
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r;
assign mem_req_wsel_out = mem_req_valid_in ? mem_req_wsel_in : mem_req_wsel_in_r;
assign mem_req_data_out = mem_req_valid_in ? mem_req_data_in : mem_req_data_in_r;
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_c) : MEM_TAG_OUT_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in_sel});
end else begin
`UNUSED_VAR (mem_req_wsel_in)
`UNUSED_VAR (mem_req_pmask_in)
assign mem_req_pmask_out = 0;
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in_sel;
assign mem_req_data_out = mem_req_valid_in ? mem_req_data_in : core_req_data_in_sel;
assign mem_req_wsel_out = 0;
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_c) : MEM_TAG_OUT_WIDTH'({core_req_nc_tid, core_req_tag_in_sel});
end
// core response handling
wire [NUM_RSP_TAGS-1:0][CORE_TAG_IN_WIDTH-1:0] core_rsp_tag_out_c;
wire is_mem_rsp_nc = mem_rsp_valid_in && mem_rsp_tag_in[NC_TAG_BIT];
for (genvar i = 0; i < NUM_RSP_TAGS; ++i) begin
VX_bits_insert #(
.N (CORE_TAG_OUT_WIDTH),
.S (1),
.POS (NC_TAG_BIT)
) core_rsp_tag_insert (
.data_in (core_rsp_tag_in[i]),
.sel_in ('0),
.data_out (core_rsp_tag_out_c[i])
);
end
if (NUM_RSP_TAGS > 1) begin
wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_IN_WIDTH + D) +: CORE_REQ_TIDW];
reg [NUM_REQS-1:0] rsp_nc_valid_r;
always @(*) begin
rsp_nc_valid_r = 0;
rsp_nc_valid_r[rsp_tid] = is_mem_rsp_nc;
end
assign core_rsp_valid_out = core_rsp_valid_in | rsp_nc_valid_r;
assign core_rsp_tmask_out = core_rsp_tmask_in;
assign core_rsp_ready_in = core_rsp_ready_out;
if (D != 0) begin
wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_IN_WIDTH +: D];
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_data_out[i] = core_rsp_valid_in[i] ?
core_rsp_data_in[i] : mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH];
end
end else begin
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_data_out[i] = core_rsp_valid_in[i] ? core_rsp_data_in[i] : mem_rsp_data_in;
end
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_tag_out[i] = core_rsp_valid_in[i] ? core_rsp_tag_out_c[i] : mem_rsp_tag_in[CORE_TAG_IN_WIDTH-1:0];
end
end else begin
assign core_rsp_valid_out = core_rsp_valid_in || is_mem_rsp_nc;
assign core_rsp_tag_out = core_rsp_valid_in ? core_rsp_tag_out_c : mem_rsp_tag_in[CORE_TAG_IN_WIDTH-1:0];
assign core_rsp_ready_in = core_rsp_ready_out;
if (NUM_REQS > 1) begin
wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_IN_WIDTH + D) +: CORE_REQ_TIDW];
reg [NUM_REQS-1:0] core_rsp_tmask_in_r;
always @(*) begin
core_rsp_tmask_in_r = 0;
core_rsp_tmask_in_r[rsp_tid] = 1;
end
assign core_rsp_tmask_out = core_rsp_valid_in ? core_rsp_tmask_in : core_rsp_tmask_in_r;
end else begin
assign core_rsp_tmask_out = core_rsp_tmask_in || is_mem_rsp_nc;
end
if (D != 0) begin
wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_IN_WIDTH +: D];
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_data_out[i] = core_rsp_valid_in ?
core_rsp_data_in[i] : mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH];
end
end else begin
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_data_out[i] = core_rsp_valid_in ? core_rsp_data_in[i] : mem_rsp_data_in;
end
end
end
// memory response handling
assign mem_rsp_valid_out = mem_rsp_valid_in && ~mem_rsp_tag_in[NC_TAG_BIT];
assign mem_rsp_data_out = mem_rsp_data_in;
VX_bits_remove #(
.N (MEM_TAG_IN_WIDTH+1),
.S (1),
.POS (NC_TAG_BIT)
) mem_rsp_tag_remove (
.data_in (mem_rsp_tag_in[(MEM_TAG_IN_WIDTH+1)-1:0]),
.data_out (mem_rsp_tag_out)
);
if (NUM_RSP_TAGS > 1) begin
wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_IN_WIDTH + D) +: CORE_REQ_TIDW];
assign mem_rsp_ready_in = is_mem_rsp_nc ? (~core_rsp_valid_in[rsp_tid] && core_rsp_ready_out[rsp_tid]) : mem_rsp_ready_out;
end else begin
assign mem_rsp_ready_in = is_mem_rsp_nc ? (~core_rsp_valid_in && core_rsp_ready_out) : mem_rsp_ready_out;
end
endmodule

View File

@@ -1,371 +0,0 @@
`include "VX_cache_define.vh"
module VX_shared_mem #(
parameter CACHE_ID = 0,
// Size of cache in bytes
parameter CACHE_SIZE = (1024*16),
// Number of banks
parameter NUM_BANKS = 2,
// Size of a word in bytes
parameter WORD_SIZE = 4,
// Number of Word requests per cycle
parameter NUM_REQS = 4,
// Core Request Queue Size
parameter CREQ_SIZE = 2,
// Core Response Queue Size
parameter CRSQ_SIZE = 2,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 8,
// core request tag size
parameter CORE_TAG_WIDTH = (2 + CORE_TAG_ID_BITS),
// bank offset from beginning of index range
parameter BANK_ADDR_OFFSET = `CLOG2(256)
) (
input wire clk,
input wire reset,
// PERF
`ifdef PERF_ENABLE
VX_perf_cache_if.master perf_cache_if,
`endif
// Core request
input wire [NUM_REQS-1:0] core_req_valid,
input wire [NUM_REQS-1:0] core_req_rw,
input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data,
input wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
output wire [NUM_REQS-1:0] core_req_ready,
// Core response
output wire core_rsp_valid,
output wire [NUM_REQS-1:0] core_rsp_tmask,
output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
output wire [CORE_TAG_WIDTH-1:0] core_rsp_tag,
input wire core_rsp_ready
);
`STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value"))
`UNUSED_PARAM (CACHE_ID)
`UNUSED_PARAM (CORE_TAG_ID_BITS)
localparam CACHE_LINE_SIZE = WORD_SIZE;
wire [NUM_BANKS-1:0] per_bank_core_req_valid_unqual;
wire [NUM_BANKS-1:0] per_bank_core_req_rw_unqual;
wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr_unqual;
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen_unqual;
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data_unqual;
wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_unqual;
wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_unqual;
wire [NUM_BANKS-1:0] per_bank_core_req_ready_unqual;
VX_core_req_bank_sel #(
.CACHE_ID (CACHE_ID),
.CACHE_LINE_SIZE (WORD_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_PORTS (1),
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.BANK_ADDR_OFFSET(BANK_ADDR_OFFSET)
) core_req_bank_sel (
.clk (clk),
.reset (reset),
`ifdef PERF_ENABLE
.bank_stalls(perf_cache_if.bank_stalls),
`endif
.core_req_valid (core_req_valid),
.core_req_rw (core_req_rw),
.core_req_addr (core_req_addr),
.core_req_byteen (core_req_byteen),
.core_req_data (core_req_data),
.core_req_tag (core_req_tag),
.core_req_ready (core_req_ready),
.per_bank_core_req_valid (per_bank_core_req_valid_unqual),
.per_bank_core_req_tid (per_bank_core_req_tid_unqual),
.per_bank_core_req_rw (per_bank_core_req_rw_unqual),
.per_bank_core_req_addr (per_bank_core_req_addr_unqual),
.per_bank_core_req_byteen(per_bank_core_req_byteen_unqual),
.per_bank_core_req_tag (per_bank_core_req_tag_unqual),
.per_bank_core_req_data (per_bank_core_req_data_unqual),
.per_bank_core_req_ready (per_bank_core_req_ready_unqual),
`UNUSED_PIN (per_bank_core_req_pmask),
`UNUSED_PIN (per_bank_core_req_wsel)
);
wire [NUM_BANKS-1:0] per_bank_core_req_valid;
wire [NUM_BANKS-1:0] per_bank_core_req_rw;
wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr;
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen;
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data;
wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag;
wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid;
wire creq_out_valid, creq_out_ready;
wire creq_in_valid, creq_in_ready;
wire creq_in_fire = creq_in_valid && creq_in_ready;
`UNUSED_VAR (creq_in_fire)
wire creq_out_fire = creq_out_valid && creq_out_ready;
`UNUSED_VAR (creq_out_fire)
assign creq_in_valid = (| core_req_valid);
assign per_bank_core_req_ready_unqual = {NUM_BANKS{creq_in_ready}};
wire [NUM_BANKS-1:0] core_req_read_mask, core_req_read_mask_unqual;
wire core_req_writeonly, core_req_writeonly_unqual;
assign core_req_read_mask_unqual = per_bank_core_req_valid_unqual & ~per_bank_core_req_rw_unqual;
assign core_req_writeonly_unqual = ~(| core_req_read_mask_unqual);
VX_elastic_buffer #(
.DATAW (NUM_BANKS * (1 + 1 + `LINE_ADDR_WIDTH + WORD_SIZE + `WORD_WIDTH + CORE_TAG_WIDTH + `REQS_BITS) + NUM_BANKS + 1),
.SIZE (CREQ_SIZE),
.OUT_REG (1) // output should be registered for the data_store addr port
) core_req_queue (
.clk (clk),
.reset (reset),
.ready_in (creq_in_ready),
.valid_in (creq_in_valid),
.data_in ({per_bank_core_req_valid_unqual,
per_bank_core_req_rw_unqual,
per_bank_core_req_addr_unqual,
per_bank_core_req_byteen_unqual,
per_bank_core_req_data_unqual,
per_bank_core_req_tag_unqual,
per_bank_core_req_tid_unqual,
core_req_read_mask_unqual,
core_req_writeonly_unqual}),
.data_out ({per_bank_core_req_valid,
per_bank_core_req_rw,
per_bank_core_req_addr,
per_bank_core_req_byteen,
per_bank_core_req_data,
per_bank_core_req_tag,
per_bank_core_req_tid,
core_req_read_mask,
core_req_writeonly}),
.ready_out (creq_out_ready),
.valid_out (creq_out_valid)
);
wire crsq_in_valid, crsq_in_ready;
wire crsq_last_read;
assign creq_out_ready = core_req_writeonly
|| (crsq_in_ready && crsq_last_read);
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data;
for (genvar i = 0; i < NUM_BANKS; i++) begin
wire [WORD_SIZE-1:0] wren = per_bank_core_req_byteen[i]
& {WORD_SIZE{per_bank_core_req_valid[i]
&& per_bank_core_req_rw[i]}};
wire [`LINE_SELECT_BITS-1:0] addr = per_bank_core_req_addr[i][`LINE_SELECT_BITS-1:0];
VX_sp_ram #(
.DATAW (`WORD_WIDTH),
.SIZE (`LINES_PER_BANK),
.BYTEENW (WORD_SIZE),
.NO_RWCHECK (1)
) data_store (
.clk (clk),
.addr (addr),
.wren (wren),
.wdata (per_bank_core_req_data[i]),
.rdata (per_bank_core_rsp_data[i])
);
end
// The core response bus handles a single tag at the time
// We first need to select the current tag to process,
// then send all bank responses for that tag as a batch
reg [NUM_REQS-1:0] core_rsp_valids_in;
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_in;
wire [CORE_TAG_WIDTH-1:0] core_rsp_tag_in;
reg [NUM_BANKS-1:0] bank_rsp_sel_r, bank_rsp_sel_n;
wire crsq_in_fire = crsq_in_valid && crsq_in_ready;
assign crsq_last_read = (bank_rsp_sel_n == core_req_read_mask);
always @(posedge clk) begin
if (reset) begin
bank_rsp_sel_r <= 0;
end else begin
if (crsq_in_fire) begin
if (crsq_last_read) begin
bank_rsp_sel_r <= 0;
end else begin
bank_rsp_sel_r <= bank_rsp_sel_n;
end
end
end
end
VX_find_first #(
.N (NUM_BANKS),
.DATAW (CORE_TAG_WIDTH)
) find_first (
.valid_i (core_req_read_mask & ~bank_rsp_sel_r),
.data_i (per_bank_core_req_tag),
.data_o (core_rsp_tag_in),
`UNUSED_PIN (valid_o)
);
always @(*) begin
core_rsp_valids_in = 0;
core_rsp_data_in = 'x;
bank_rsp_sel_n = bank_rsp_sel_r;
for (integer i = 0; i < NUM_BANKS; i++) begin
if (core_req_read_mask[i]
&& (core_rsp_tag_in[CORE_TAG_ID_BITS-1:0] == per_bank_core_req_tag[i][CORE_TAG_ID_BITS-1:0])) begin
core_rsp_valids_in[per_bank_core_req_tid[i]] = 1;
core_rsp_data_in[per_bank_core_req_tid[i]] = per_bank_core_rsp_data[i];
bank_rsp_sel_n[i] = 1;
end
end
end
assign crsq_in_valid = creq_out_valid && ~core_req_writeonly;
VX_elastic_buffer #(
.DATAW (NUM_BANKS * (1 + `WORD_WIDTH) + CORE_TAG_WIDTH),
.SIZE (CRSQ_SIZE)
) core_rsp_req (
.clk (clk),
.reset (reset),
.valid_in (crsq_in_valid),
.data_in ({core_rsp_valids_in, core_rsp_data_in, core_rsp_tag_in}),
.ready_in (crsq_in_ready),
.valid_out (core_rsp_valid),
.data_out ({core_rsp_tmask, core_rsp_data, core_rsp_tag}),
.ready_out (core_rsp_ready)
);
`IGNORE_UNUSED_BEGIN
wire [NUM_BANKS-1:0][`DBG_CACHE_REQ_IDW-1:0] req_id_st0, req_id_st1;
`IGNORE_UNUSED_END
for (genvar i = 0; i < NUM_BANKS; ++i) begin
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign req_id_st0[i] = per_bank_core_req_tag_unqual[i][`CACHE_REQ_ID_RNG];
assign req_id_st1[i] = per_bank_core_req_tag[i][`CACHE_REQ_ID_RNG];
end else begin
assign req_id_st0[i] = 0;
assign req_id_st1[i] = 0;
end
end
`ifdef DBG_TRACE_CACHE_BANK
reg is_multi_tag_req;
`IGNORE_UNUSED_BEGIN
reg [CORE_TAG_WIDTH-1:0] core_req_tag_sel;
`IGNORE_UNUSED_END
VX_find_first #(
.N (NUM_BANKS),
.DATAW (CORE_TAG_WIDTH)
) find_first_d (
.valid_i (per_bank_core_req_valid),
.data_i (per_bank_core_req_tag),
.data_o (core_req_tag_sel),
`UNUSED_PIN (valid_o)
);
always @(*) begin
is_multi_tag_req = 0;
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_req_valid[i]
&& (core_req_tag_sel[CORE_TAG_ID_BITS-1:0] != per_bank_core_req_tag[i][CORE_TAG_ID_BITS-1:0])) begin
is_multi_tag_req = creq_out_valid;
end
end
end
always @(posedge clk) begin
if (!crsq_in_ready) begin
dpi_trace("%d: *** cache%0d pipeline-stall\n", $time, CACHE_ID);
end
if (is_multi_tag_req) begin
dpi_trace("%d: *** cache%0d multi-tag request!\n", $time, CACHE_ID);
end
if (creq_in_fire) begin
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_req_valid_unqual[i]) begin
if (per_bank_core_req_rw_unqual[i]) begin
dpi_trace("%d: smem%0d:%0d core-wr-req: addr=%0h, tag=%0h, byteen=%b, data=%0h (#%0d)\n",
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr_unqual[i], i), per_bank_core_req_tag_unqual[i], per_bank_core_req_byteen_unqual[i], per_bank_core_req_data_unqual[i], req_id_st0[i]);
end else begin
dpi_trace("%d: smem%0d:%0d core-rd-req: addr=%0h, tag=%0h (#%0d)\n",
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr_unqual[i], i), per_bank_core_req_tag_unqual[i], req_id_st0[i]);
end
end
end
end
if (creq_out_fire) begin
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_req_valid[i]) begin
if (per_bank_core_req_rw[i]) begin
dpi_trace("%d: smem%0d:%0d core-wr-rsp: addr=%0h, tag=%0h, data=%0h (#%0d)\n",
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr[i], i), per_bank_core_req_tag[i], per_bank_core_req_data[i], req_id_st1[i]);
end else begin
dpi_trace("%d: smem%0d:%0d core-rd-rsp: addr=%0h, tag=%0h, data=%0h (#%0d)\n",
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr[i], i), per_bank_core_req_tag[i], per_bank_core_rsp_data[i], req_id_st1[i]);
end
end
end
end
end
`endif
`ifdef PERF_ENABLE
// per cycle: core_reads, core_writes
wire [$clog2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle;
wire [$clog2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle;
wire [NUM_REQS-1:0] perf_core_reads_per_mask = core_req_valid & core_req_ready & ~core_req_rw;
wire [NUM_REQS-1:0] perf_core_writes_per_mask = core_req_valid & core_req_ready & core_req_rw;
`POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_mask);
`POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_mask);
wire perf_crsp_stall_per_cycle = core_rsp_valid & ~core_rsp_ready;
reg [`PERF_CTR_BITS-1:0] perf_core_reads;
reg [`PERF_CTR_BITS-1:0] perf_core_writes;
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
always @(posedge clk) begin
if (reset) begin
perf_core_reads <= 0;
perf_core_writes <= 0;
perf_crsp_stalls <= 0;
end else begin
perf_core_reads <= perf_core_reads + `PERF_CTR_BITS'(perf_core_reads_per_cycle);
perf_core_writes <= perf_core_writes + `PERF_CTR_BITS'(perf_core_writes_per_cycle);
perf_crsp_stalls <= perf_crsp_stalls + `PERF_CTR_BITS'(perf_crsp_stall_per_cycle);
end
end
assign perf_cache_if.reads = perf_core_reads;
assign perf_cache_if.writes = perf_core_writes;
assign perf_cache_if.read_misses = '0;
assign perf_cache_if.write_misses = '0;
assign perf_cache_if.mshr_stalls = '0;
assign perf_cache_if.mem_stalls = '0;
assign perf_cache_if.crsp_stalls = perf_crsp_stalls;
`endif
endmodule

View File

@@ -1,79 +0,0 @@
`include "VX_cache_define.vh"
module VX_tag_access #(
parameter CACHE_ID = 0,
parameter BANK_ID = 0,
// Size of cache in bytes
parameter CACHE_SIZE = 1,
// Size of line inside a bank in bytes
parameter CACHE_LINE_SIZE = 1,
// Number of banks
parameter NUM_BANKS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 1,
// bank offset from beginning of index range
parameter BANK_ADDR_OFFSET = 0
) (
input wire clk,
input wire reset,
`IGNORE_UNUSED_BEGIN
input wire[`DBG_CACHE_REQ_IDW-1:0] req_id,
`IGNORE_UNUSED_END
input wire stall,
// read/fill
input wire lookup,
input wire[`LINE_ADDR_WIDTH-1:0] addr,
input wire fill,
input wire flush,
output wire tag_match
);
`UNUSED_PARAM (CACHE_ID)
`UNUSED_PARAM (BANK_ID)
`UNUSED_VAR (reset)
`UNUSED_VAR (lookup)
wire [`TAG_SELECT_BITS-1:0] read_tag;
wire read_valid;
wire [`LINE_SELECT_BITS-1:0] line_addr = addr[`LINE_SELECT_BITS-1:0];
wire [`TAG_SELECT_BITS-1:0] line_tag = `LINE_TAG_ADDR(addr);
VX_sp_ram #(
.DATAW (`TAG_SELECT_BITS + 1),
.SIZE (`LINES_PER_BANK),
.NO_RWCHECK (1)
) tag_store (
.clk( clk),
.addr (line_addr),
.wren (fill || flush),
.wdata ({!flush, line_tag}),
.rdata ({read_valid, read_tag})
);
assign tag_match = read_valid && (line_tag == read_tag);
`UNUSED_VAR (stall)
`ifdef DBG_TRACE_CACHE_TAG
always @(posedge clk) begin
if (fill && ~stall) begin
dpi_trace("%d: cache%0d:%0d tag-fill: addr=%0h, blk_addr=%0d, tag_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, line_tag);
end
if (flush) begin
dpi_trace("%d: cache%0d:%0d tag-flush: addr=%0h, blk_addr=%0d\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr);
end
if (lookup && ~stall) begin
if (tag_match) begin
dpi_trace("%d: cache%0d:%0d tag-hit: addr=%0h, blk_addr=%0d, tag_id=%0h (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, line_tag, req_id);
end else begin
dpi_trace("%d: cache%0d:%0d tag-miss: addr=%0h, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, line_tag, read_tag, req_id);
end
end
end
`endif
endmodule