specialized shared memory module

This commit is contained in:
Blaise Tine
2021-01-16 04:41:58 -08:00
parent ad6e0b4e77
commit fcbf57b66a
12 changed files with 395 additions and 295 deletions

View File

@@ -4,7 +4,7 @@ SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/riscv32-unknown-elf
POCL_CC_PATH ?= /opt/pocl/compiler POCL_CC_PATH ?= /opt/pocl/compiler
POCL_RT_PATH ?= /opt/pocl/runtime POCL_RT_PATH ?= /opt/pocl/runtime
OPTS ?= -n64 OPTS ?= -n32
VORTEX_DRV_PATH ?= $(realpath ../../../driver) VORTEX_DRV_PATH ?= $(realpath ../../../driver)
VORTEX_RT_PATH ?= $(realpath ../../../runtime) VORTEX_RT_PATH ?= $(realpath ../../../runtime)

View File

@@ -101,7 +101,7 @@ static void cleanup() {
if (h_c) free(h_c); if (h_c) free(h_c);
} }
int size = 64; int size = 32;
static void show_usage() { static void show_usage() {
printf("Usage: [-n size] [-h: help]\n"); printf("Usage: [-n size] [-h: help]\n");

View File

@@ -172,7 +172,6 @@ module VX_cluster #(
.DRSQ_SIZE (`L2DRSQ_SIZE), .DRSQ_SIZE (`L2DRSQ_SIZE),
.CRSQ_SIZE (`L2CRSQ_SIZE), .CRSQ_SIZE (`L2CRSQ_SIZE),
.DREQ_SIZE (`L2DREQ_SIZE), .DREQ_SIZE (`L2DREQ_SIZE),
.DRAM_ENABLE (1),
.WRITE_ENABLE (1), .WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`XDRAM_TAG_WIDTH), .CORE_TAG_WIDTH (`XDRAM_TAG_WIDTH),
.CORE_TAG_ID_BITS (0), .CORE_TAG_ID_BITS (0),

View File

@@ -299,14 +299,11 @@
// Cache ID // Cache ID
`define SCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2) `define SCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2)
// Block size in bytes
`define SCACHE_LINE_SIZE 4
// Word size in bytes // Word size in bytes
`define SWORD_SIZE 4 `define SWORD_SIZE 4
// bank address offset // bank address offset
`define SBANK_ADDR_OFFSET `CLOG2(`STACK_SIZE / `SCACHE_LINE_SIZE) `define SBANK_ADDR_OFFSET `CLOG2(`STACK_SIZE / `SWORD_SIZE)
// Core request size // Core request size
`define SNUM_REQUESTS `NUM_THREADS `define SNUM_REQUESTS `NUM_THREADS

View File

@@ -103,7 +103,6 @@ module VX_mem_unit # (
.DRSQ_SIZE (`IDRSQ_SIZE), .DRSQ_SIZE (`IDRSQ_SIZE),
.CRSQ_SIZE (`ICRSQ_SIZE), .CRSQ_SIZE (`ICRSQ_SIZE),
.DREQ_SIZE (`IDREQ_SIZE), .DREQ_SIZE (`IDREQ_SIZE),
.DRAM_ENABLE (1),
.WRITE_ENABLE (0), .WRITE_ENABLE (0),
.CORE_TAG_WIDTH (`ICORE_TAG_WIDTH), .CORE_TAG_WIDTH (`ICORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS), .CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS),
@@ -160,8 +159,7 @@ module VX_mem_unit # (
.MSHR_SIZE (`DMSHR_SIZE), .MSHR_SIZE (`DMSHR_SIZE),
.DRSQ_SIZE (`DDRSQ_SIZE), .DRSQ_SIZE (`DDRSQ_SIZE),
.CRSQ_SIZE (`DCRSQ_SIZE), .CRSQ_SIZE (`DCRSQ_SIZE),
.DREQ_SIZE (`DDREQ_SIZE), .DREQ_SIZE (`DDREQ_SIZE),
.DRAM_ENABLE (1),
.WRITE_ENABLE (1), .WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH), .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS), .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
@@ -215,22 +213,16 @@ module VX_mem_unit # (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.reset_out (scache_reset) .reset_out (scache_reset)
); );
VX_cache #( VX_shared_mem #(
.CACHE_ID (`SCACHE_ID), .CACHE_ID (`SCACHE_ID),
.CACHE_SIZE (`SMEM_SIZE), .CACHE_SIZE (`SMEM_SIZE),
.CACHE_LINE_SIZE (`SCACHE_LINE_SIZE),
.NUM_BANKS (`SNUM_BANKS), .NUM_BANKS (`SNUM_BANKS),
.WORD_SIZE (`SWORD_SIZE), .WORD_SIZE (`SWORD_SIZE),
.NUM_REQS (`SNUM_REQUESTS), .NUM_REQS (`SNUM_REQUESTS),
.CREQ_SIZE (`SCREQ_SIZE), .CREQ_SIZE (`SCREQ_SIZE),
.MSHR_SIZE (8),
.DRSQ_SIZE (1),
.CRSQ_SIZE (`SCRSQ_SIZE), .CRSQ_SIZE (`SCRSQ_SIZE),
.DREQ_SIZE (1),
.DRAM_ENABLE (0),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH), .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS), .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
.BANK_ADDR_OFFSET (`SBANK_ADDR_OFFSET) .BANK_ADDR_OFFSET (`SBANK_ADDR_OFFSET)
@@ -240,6 +232,10 @@ module VX_mem_unit # (
.clk (clk), .clk (clk),
.reset (scache_reset), .reset (scache_reset),
`ifdef PERF_ENABLE
.perf_cache_if (perf_smem_if),
`endif
// Core request // Core request
.core_req_valid (smem_req_if.valid), .core_req_valid (smem_req_if.valid),
.core_req_rw (smem_req_if.rw), .core_req_rw (smem_req_if.rw),
@@ -253,26 +249,7 @@ module VX_mem_unit # (
.core_rsp_valid (smem_rsp_if.valid), .core_rsp_valid (smem_rsp_if.valid),
.core_rsp_data (smem_rsp_if.data), .core_rsp_data (smem_rsp_if.data),
.core_rsp_tag (smem_rsp_if.tag), .core_rsp_tag (smem_rsp_if.tag),
.core_rsp_ready (smem_rsp_if.ready), .core_rsp_ready (smem_rsp_if.ready)
`ifdef PERF_ENABLE
.perf_cache_if (perf_smem_if),
`endif
// DRAM request
`UNUSED_PIN (dram_req_valid),
`UNUSED_PIN (dram_req_rw),
`UNUSED_PIN (dram_req_byteen),
`UNUSED_PIN (dram_req_addr),
`UNUSED_PIN (dram_req_data),
`UNUSED_PIN (dram_req_tag),
.dram_req_ready (1'b0),
// DRAM response
.dram_rsp_valid (0),
.dram_rsp_data (0),
.dram_rsp_tag (0),
`UNUSED_PIN (dram_rsp_ready)
); );
end end

View File

@@ -174,7 +174,6 @@ module Vortex (
.DRSQ_SIZE (`L3DRSQ_SIZE), .DRSQ_SIZE (`L3DRSQ_SIZE),
.CRSQ_SIZE (`L3CRSQ_SIZE), .CRSQ_SIZE (`L3CRSQ_SIZE),
.DREQ_SIZE (`L3DREQ_SIZE), .DREQ_SIZE (`L3DREQ_SIZE),
.DRAM_ENABLE (1),
.WRITE_ENABLE (1), .WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH), .CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH),
.CORE_TAG_ID_BITS (0), .CORE_TAG_ID_BITS (0),

302
hw/rtl/cache/VX_bank.v vendored
View File

@@ -27,9 +27,6 @@ module VX_bank #(
// DRAM Request Queue Size // DRAM Request Queue Size
parameter DREQ_SIZE = 1, parameter DREQ_SIZE = 1,
// Enable dram update
parameter DRAM_ENABLE = 1,
// Enable cache writeable // Enable cache writeable
parameter WRITE_ENABLE = 1, parameter WRITE_ENABLE = 1,
@@ -103,38 +100,27 @@ module VX_bank #(
wire drsq_push = dram_rsp_valid && dram_rsp_ready; wire drsq_push = dram_rsp_valid && dram_rsp_ready;
if (DRAM_ENABLE) begin wire drsq_full;
wire drsq_full; assign dram_rsp_ready = !drsq_full;
assign dram_rsp_ready = !drsq_full;
VX_fifo_queue_xt #( VX_fifo_queue_xt #(
.DATAW (`LINE_ADDR_WIDTH + $bits(dram_rsp_data)), .DATAW (`LINE_ADDR_WIDTH + $bits(dram_rsp_data)),
.SIZE (DRSQ_SIZE), .SIZE (DRSQ_SIZE),
.FASTRAM (1) .FASTRAM (1)
) dram_rsp_queue ( ) dram_rsp_queue (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.push (drsq_push), .push (drsq_push),
.pop (drsq_pop), .pop (drsq_pop),
.data_in ({dram_rsp_addr, dram_rsp_data}), .data_in ({dram_rsp_addr, dram_rsp_data}),
`UNUSED_PIN (data_out), `UNUSED_PIN (data_out),
.empty (drsq_empty), .empty (drsq_empty),
.data_out_next ({drsq_addr_next, drsq_filldata_next}), .data_out_next ({drsq_addr_next, drsq_filldata_next}),
.empty_next (drsq_empty_next), .empty_next (drsq_empty_next),
.full (drsq_full), .full (drsq_full),
`UNUSED_PIN (almost_full), `UNUSED_PIN (almost_full),
`UNUSED_PIN (size) `UNUSED_PIN (size)
); );
end else begin
`UNUSED_VAR (dram_rsp_valid)
`UNUSED_VAR (dram_rsp_addr)
`UNUSED_VAR (dram_rsp_data)
assign drsq_empty = 1;
assign drsq_empty_next = 1;
assign drsq_addr_next = 0;
assign drsq_filldata_next = 0;
assign dram_rsp_ready = 0;
end
wire creq_pop; wire creq_pop;
wire creq_full, creq_empty; wire creq_full, creq_empty;
@@ -221,14 +207,6 @@ module VX_bank #(
wire dreq_push_unqual_st0, dreq_push_unqual_st1; wire dreq_push_unqual_st0, dreq_push_unqual_st1;
wire writeen_st1; wire writeen_st1;
wire core_req_hit_st1; wire core_req_hit_st1;
wire valid_st01;
wire writeen_st01;
wire [`LINE_ADDR_WIDTH-1:0] addr_st01;
wire [`UP(`WORD_SELECT_BITS)-1:0] wsel_st01;
wire [WORD_SIZE-1:0] byteen_st01;
wire [`WORD_WIDTH-1:0] writeword_st01;
wire [`REQ_TAG_WIDTH-1:0] tag_st01;
wire mshr_push_stall; wire mshr_push_stall;
wire crsq_push_stall; wire crsq_push_stall;
@@ -278,8 +256,7 @@ module VX_bank #(
assign {debug_pc_st0, debug_wid_st0} = 0; assign {debug_pc_st0, debug_wid_st0} = 0;
end end
`endif `endif
if (DRAM_ENABLE) begin
VX_tag_access #( VX_tag_access #(
.BANK_ID (BANK_ID), .BANK_ID (BANK_ID),
.CACHE_ID (CACHE_ID), .CACHE_ID (CACHE_ID),
@@ -290,7 +267,7 @@ if (DRAM_ENABLE) begin
.WORD_SIZE (WORD_SIZE), .WORD_SIZE (WORD_SIZE),
.WRITE_ENABLE (WRITE_ENABLE), .WRITE_ENABLE (WRITE_ENABLE),
.BANK_ADDR_OFFSET (BANK_ADDR_OFFSET) .BANK_ADDR_OFFSET (BANK_ADDR_OFFSET)
) tag_access ( ) tag_access (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -314,66 +291,28 @@ if (DRAM_ENABLE) begin
.writeen_in (valid_st1 && writeen_st1) .writeen_in (valid_st1 && writeen_st1)
); );
assign valid_st01 = valid_st1;
assign writeen_st01 = writeen_st1;
assign addr_st01 = addr_st1;
assign wsel_st01 = wsel_st1;
assign byteen_st01 = byteen_st1;
assign writeword_st01 = writeword_st1;
assign tag_st01 = tag_st1;
// redundant fills // redundant fills
wire is_redundant_fill = is_fill_st0 && !miss_st0; wire is_redundant_fill = is_fill_st0 && !miss_st0;
// we have a miss in mshr or going to it for the current address // we have a miss in mshr or going to it for the current address
wire mshr_pending_st0 = mshr_pending_unqual_st0 wire mshr_pending_st0 = mshr_pending_unqual_st0
|| (valid_st1 && (miss_st1 || force_miss_st1) && (addr_st0 == addr_st1)); || (valid_st1 && (miss_st1 || force_miss_st1) && (addr_st0 == addr_st1));
// force miss to ensure commit order when a new request has pending previous requests to same block // force miss to ensure commit order when a new request has pending previous requests to same block
assign force_miss_st0 = !is_mshr_st0 && !is_fill_st0 && mshr_pending_st0; assign force_miss_st0 = !is_mshr_st0 && !is_fill_st0 && mshr_pending_st0;
assign writeen_unqual_st0 = (!is_fill_st0 && !miss_st0 && mem_rw_st0) assign writeen_unqual_st0 = (!is_fill_st0 && !miss_st0 && mem_rw_st0)
|| (is_fill_st0 && !is_redundant_fill); || (is_fill_st0 && !is_redundant_fill);
wire send_fill_req_st0 = !is_fill_st0 && miss_st0 wire send_fill_req_st0 = !is_fill_st0 && miss_st0
&& !(WRITE_THROUGH && mem_rw_st0); && !(WRITE_THROUGH && mem_rw_st0);
assign do_writeback_st0 = (WRITE_THROUGH && !is_fill_st0 && mem_rw_st0) assign do_writeback_st0 = (WRITE_THROUGH && !is_fill_st0 && mem_rw_st0)
|| (!WRITE_THROUGH && is_fill_st0 && dirty_st0 && !is_redundant_fill); || (!WRITE_THROUGH && is_fill_st0 && dirty_st0 && !is_redundant_fill);
assign dreq_push_unqual_st0 = send_fill_req_st0 || do_writeback_st0; assign dreq_push_unqual_st0 = send_fill_req_st0 || do_writeback_st0;
assign mshr_push_unqual_st0 = !is_fill_st0 && !(WRITE_THROUGH && mem_rw_st0); assign mshr_push_unqual_st0 = !is_fill_st0 && !(WRITE_THROUGH && mem_rw_st0);
end else begin
`UNUSED_VAR (mshr_pending_unqual_st0)
`UNUSED_VAR (drsq_push)
`UNUSED_VAR (dirty_st0)
`UNUSED_VAR (writeen_st1)
`ifdef DBG_CACHE_REQ_INFO
assign debug_pc_st1 = debug_pc_st0;
assign debug_wid_st1 = debug_wid_st0;
`endif
assign valid_st01 = valid_st0;
assign writeen_st01 = mem_rw_st0;
assign addr_st01 = addr_st0;
assign wsel_st01 = wsel_st0;
assign byteen_st01 = byteen_st0;
assign writeword_st01 = writeword_st0;
assign tag_st01 = tag_st0;
assign miss_st0 = 0;
assign dirty_st0 = 0;
assign force_miss_st0 = 0;
assign readtag_st0 = 0;
assign do_writeback_st0 = 0;
assign writeen_unqual_st0 = mem_rw_st0;
assign dreq_push_unqual_st0 = 0;
assign mshr_push_unqual_st0 = 0;
end
VX_pipe_register #( VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH + `WORD_WIDTH + `TAG_SELECT_BITS + `CACHE_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH), .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH + `WORD_WIDTH + `TAG_SELECT_BITS + `CACHE_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH),
@@ -403,7 +342,6 @@ end
assign {debug_pc_st01, debug_wid_st01} = 0; assign {debug_pc_st01, debug_wid_st01} = 0;
end end
`endif `endif
`UNUSED_VAR (tag_st01)
VX_data_access #( VX_data_access #(
.BANK_ID (BANK_ID), .BANK_ID (BANK_ID),
@@ -412,7 +350,6 @@ end
.CACHE_SIZE (CACHE_SIZE), .CACHE_SIZE (CACHE_SIZE),
.CACHE_LINE_SIZE (CACHE_LINE_SIZE), .CACHE_LINE_SIZE (CACHE_LINE_SIZE),
.NUM_BANKS (NUM_BANKS), .NUM_BANKS (NUM_BANKS),
.DRAM_ENABLE (DRAM_ENABLE),
.WORD_SIZE (WORD_SIZE), .WORD_SIZE (WORD_SIZE),
.WRITE_ENABLE (WRITE_ENABLE), .WRITE_ENABLE (WRITE_ENABLE),
.WRITE_THROUGH (WRITE_THROUGH) .WRITE_THROUGH (WRITE_THROUGH)
@@ -435,12 +372,12 @@ end
.dirtyb_out (dirtyb_st0), .dirtyb_out (dirtyb_st0),
// writing // writing
.writeen_in (valid_st01 && writeen_st01), .writeen_in (valid_st1 && writeen_st1),
.waddr_in (addr_st01), .waddr_in (addr_st1),
.wfill_in (is_fill_st1), .wfill_in (is_fill_st1),
.wwsel_in (wsel_st01), .wwsel_in (wsel_st1),
.wbyteen_in (byteen_st01), .wbyteen_in (byteen_st1),
.writeword_in (writeword_st01), .writeword_in (writeword_st1),
.filldata_in (filldata_st1) .filldata_in (filldata_st1)
); );
@@ -461,81 +398,59 @@ end
wire incoming_fill_st1 = valid_st0 && is_fill_st0 && (addr_st1 == addr_st0); wire incoming_fill_st1 = valid_st0 && is_fill_st0 && (addr_st1 == addr_st0);
if (DRAM_ENABLE) begin wire mshr_dequeue_st1 = valid_st1 && is_mshr_st1 && !mshr_push_unqual && !pipeline_stall;
wire mshr_dequeue_st1 = valid_st1 && is_mshr_st1 && !mshr_push_unqual && !pipeline_stall; // push a missed request as 'ready' if it was a forced miss that actually had a hit
// or the fill request for this block is comming
wire mshr_init_ready_state_st1 = !miss_st1 || incoming_fill_st1;
// push a missed request as 'ready' if it was a forced miss that actually had a hit VX_miss_resrv #(
// or the fill request for this block is comming .BANK_ID (BANK_ID),
wire mshr_init_ready_state_st1 = !miss_st1 || incoming_fill_st1; .CACHE_ID (CACHE_ID),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS),
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.MSHR_SIZE (MSHR_SIZE),
.ALM_FULL (MSHR_SIZE-1),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH)
) miss_resrv (
.clk (clk),
.reset (reset),
VX_miss_resrv #( `ifdef DBG_CACHE_REQ_INFO
.BANK_ID (BANK_ID), .deq_debug_pc (debug_pc_st0),
.CACHE_ID (CACHE_ID), .deq_debug_wid (debug_wid_st0),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS), .enq_debug_pc (debug_pc_st1),
.CACHE_LINE_SIZE (CACHE_LINE_SIZE), .enq_debug_wid (debug_wid_st1),
.NUM_BANKS (NUM_BANKS), `endif
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.MSHR_SIZE (MSHR_SIZE),
.ALM_FULL (MSHR_SIZE-1),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH)
) miss_resrv (
.clk (clk),
.reset (reset),
`ifdef DBG_CACHE_REQ_INFO // enqueue
.deq_debug_pc (debug_pc_st0), .enqueue (mshr_push),
.deq_debug_wid (debug_wid_st0), .enqueue_addr (addr_st1),
.enq_debug_pc (debug_pc_st1), .enqueue_data ({writeword_st1, req_tid_st1, tag_st1, mem_rw_st1, byteen_st1, wsel_st1}),
.enq_debug_wid (debug_wid_st1), .enqueue_is_mshr (is_mshr_st1),
`endif .enqueue_as_ready (mshr_init_ready_state_st1),
.enqueue_almfull (mshr_almost_full),
// enqueue // lookup
.enqueue (mshr_push), .lookup_ready (drsq_pop),
.enqueue_addr (addr_st1), .lookup_addr (addr_st0),
.enqueue_data ({writeword_st1, req_tid_st1, tag_st1, mem_rw_st1, byteen_st1, wsel_st1}), .lookup_match (mshr_pending_unqual_st0),
.enqueue_is_mshr (is_mshr_st1),
.enqueue_as_ready (mshr_init_ready_state_st1), // schedule
.enqueue_almfull (mshr_almost_full), .schedule (mshr_pop),
.schedule_valid (mshr_valid),
`UNUSED_PIN (schedule_addr),
`UNUSED_PIN (schedule_data),
.schedule_valid_next(mshr_valid_next),
.schedule_addr_next (mshr_addr_next),
.schedule_data_next ({mshr_writeword_next, mshr_tid_next, mshr_tag_next, mshr_rw_next, mshr_byteen_next, mshr_wsel_next}),
// lookup // dequeue
.lookup_ready (drsq_pop), .dequeue (mshr_dequeue_st1)
.lookup_addr (addr_st0), );
.lookup_match (mshr_pending_unqual_st0),
// schedule
.schedule (mshr_pop),
.schedule_valid (mshr_valid),
`UNUSED_PIN (schedule_addr),
`UNUSED_PIN (schedule_data),
.schedule_valid_next(mshr_valid_next),
.schedule_addr_next (mshr_addr_next),
.schedule_data_next ({mshr_writeword_next, mshr_tid_next, mshr_tag_next, mshr_rw_next, mshr_byteen_next, mshr_wsel_next}),
// dequeue
.dequeue (mshr_dequeue_st1)
);
end else begin
`UNUSED_VAR (valid_st1)
`UNUSED_VAR (mshr_push)
`UNUSED_VAR (wsel_st1)
`UNUSED_VAR (writeword_st1)
`UNUSED_VAR (mem_rw_st1)
`UNUSED_VAR (byteen_st1)
`UNUSED_VAR (incoming_fill_st1)
assign mshr_almost_full = 0;
assign mshr_pending_unqual_st0 = 0;
assign mshr_valid = 0;
assign mshr_valid_next = 0;
assign mshr_addr_next = 0;
assign mshr_wsel_next = 0;
assign mshr_writeword_next = 0;
assign mshr_tid_next = 0;
assign mshr_tag_next = 0;
assign mshr_rw_next = 0;
assign mshr_byteen_next = 0;
end
// Enqueue core response // Enqueue core response
@@ -625,44 +540,25 @@ end
assign dreq_byteen = writeback ? dreq_byteen_unqual : {CACHE_LINE_SIZE{1'b1}}; assign dreq_byteen = writeback ? dreq_byteen_unqual : {CACHE_LINE_SIZE{1'b1}};
if (DRAM_ENABLE) begin VX_fifo_queue_xt #(
VX_fifo_queue_xt #( .DATAW (1 + CACHE_LINE_SIZE + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH),
.DATAW (1 + CACHE_LINE_SIZE + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH), .SIZE (DREQ_SIZE),
.SIZE (DREQ_SIZE), .ALM_FULL (DREQ_SIZE-1),
.ALM_FULL (DREQ_SIZE-1), .FASTRAM (1)
.FASTRAM (1) ) dram_req_queue (
) dram_req_queue ( .clk (clk),
.clk (clk), .reset (reset),
.reset (reset), .push (dreq_push),
.push (dreq_push), .pop (dreq_pop),
.pop (dreq_pop), .data_in ({writeback, dreq_byteen, dreq_addr, dreq_data}),
.data_in ({writeback, dreq_byteen, dreq_addr, dreq_data}), .data_out({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}),
.data_out({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}), .empty (dreq_empty),
.empty (dreq_empty), .almost_full (dreq_almost_full),
.almost_full (dreq_almost_full), `UNUSED_PIN (full),
`UNUSED_PIN (full), `UNUSED_PIN (data_out_next),
`UNUSED_PIN (data_out_next), `UNUSED_PIN (empty_next),
`UNUSED_PIN (empty_next), `UNUSED_PIN (size)
`UNUSED_PIN (size) );
);
end else begin
`UNUSED_VAR (dreq_push)
`UNUSED_VAR (dreq_pop)
`UNUSED_VAR (dreq_addr)
`UNUSED_VAR (dreq_data)
`UNUSED_VAR (dreq_byteen)
`UNUSED_VAR (readtag_st1)
`UNUSED_VAR (dirtyb_st1)
`UNUSED_VAR (readdata_st1)
`UNUSED_VAR (writeback)
`UNUSED_VAR (dram_req_ready)
assign dreq_empty = 1;
assign dreq_almost_full = 0;
assign dram_req_rw = 0;
assign dram_req_byteen = 0;
assign dram_req_addr = 0;
assign dram_req_data = 0;
end
assign dram_req_valid = !dreq_empty; assign dram_req_valid = !dreq_empty;

View File

@@ -24,10 +24,7 @@ module VX_cache #(
// Core Response Queue Size // Core Response Queue Size
parameter CRSQ_SIZE = 4, parameter CRSQ_SIZE = 4,
// DRAM Request Queue Size // DRAM Request Queue Size
parameter DREQ_SIZE = 4, parameter DREQ_SIZE = 4,
// Enable dram update
parameter DRAM_ENABLE = 1,
// Enable cache writeable // Enable cache writeable
parameter WRITE_ENABLE = 1, parameter WRITE_ENABLE = 1,
@@ -129,8 +126,8 @@ module VX_cache #(
.NUM_REQS (NUM_REQS), .NUM_REQS (NUM_REQS),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH), .CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.BANK_ADDR_OFFSET(BANK_ADDR_OFFSET), .BANK_ADDR_OFFSET(BANK_ADDR_OFFSET),
.BUFFERED ((NUM_BANKS > 1) && DRAM_ENABLE) .BUFFERED (NUM_BANKS > 1)
) cache_core_req_bank_sel ( ) core_req_bank_sel (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
@@ -244,7 +241,6 @@ module VX_cache #(
.DRSQ_SIZE (DRSQ_SIZE), .DRSQ_SIZE (DRSQ_SIZE),
.CRSQ_SIZE (CRSQ_SIZE), .CRSQ_SIZE (CRSQ_SIZE),
.DREQ_SIZE (DREQ_SIZE), .DREQ_SIZE (DREQ_SIZE),
.DRAM_ENABLE (DRAM_ENABLE),
.WRITE_ENABLE (WRITE_ENABLE), .WRITE_ENABLE (WRITE_ENABLE),
.WRITE_THROUGH (WRITE_THROUGH), .WRITE_THROUGH (WRITE_THROUGH),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH), .CORE_TAG_WIDTH (CORE_TAG_WIDTH),
@@ -302,7 +298,7 @@ module VX_cache #(
.NUM_REQS (NUM_REQS), .NUM_REQS (NUM_REQS),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH), .CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS) .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
) cache_core_rsp_merge ( ) core_rsp_merge (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.per_bank_core_rsp_valid (per_bank_core_rsp_valid), .per_bank_core_rsp_valid (per_bank_core_rsp_valid),
@@ -316,41 +312,26 @@ module VX_cache #(
.core_rsp_ready (core_rsp_ready) .core_rsp_ready (core_rsp_ready)
); );
if (DRAM_ENABLE) begin wire [NUM_BANKS-1:0][(`DRAM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH)-1:0] data_in;
wire [NUM_BANKS-1:0][(`DRAM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH)-1:0] data_in; for (genvar i = 0; i < NUM_BANKS; i++) begin
for (genvar i = 0; i < NUM_BANKS; i++) begin assign data_in[i] = {per_bank_dram_req_addr[i], per_bank_dram_req_rw[i], per_bank_dram_req_byteen[i], per_bank_dram_req_data[i]};
assign data_in[i] = {per_bank_dram_req_addr[i], per_bank_dram_req_rw[i], per_bank_dram_req_byteen[i], per_bank_dram_req_data[i]};
end
VX_stream_arbiter #(
.NUM_REQS (NUM_BANKS),
.DATAW (`DRAM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH),
.BUFFERED (1)
) dram_req_arb (
.clk (clk),
.reset (reset),
.valid_in (per_bank_dram_req_valid),
.data_in (data_in),
.ready_in (per_bank_dram_req_ready),
.valid_out (dram_req_valid),
.data_out ({dram_req_addr, dram_req_rw, dram_req_byteen, dram_req_data}),
.ready_out (dram_req_ready)
);
end else begin
`UNUSED_VAR (per_bank_dram_req_valid)
`UNUSED_VAR (per_bank_dram_req_rw)
`UNUSED_VAR (per_bank_dram_req_byteen)
`UNUSED_VAR (per_bank_dram_req_addr)
`UNUSED_VAR (per_bank_dram_req_data)
assign per_bank_dram_req_ready = 0;
assign dram_req_valid = 0;
assign dram_req_rw = 0;
assign dram_req_byteen = 0;
assign dram_req_addr = 0;
assign dram_req_data = 0;
`UNUSED_VAR (dram_req_ready)
end end
VX_stream_arbiter #(
.NUM_REQS (NUM_BANKS),
.DATAW (`DRAM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH),
.BUFFERED (1)
) dram_req_arb (
.clk (clk),
.reset (reset),
.valid_in (per_bank_dram_req_valid),
.data_in (data_in),
.ready_in (per_bank_dram_req_ready),
.valid_out (dram_req_valid),
.data_out ({dram_req_addr, dram_req_rw, dram_req_byteen, dram_req_data}),
.ready_out (dram_req_ready)
);
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
// per cycle: core_reads, core_writes // per cycle: core_reads, core_writes
reg [($clog2(NUM_REQS+1)-1):0] perf_core_reads_per_cycle, perf_core_writes_per_cycle; reg [($clog2(NUM_REQS+1)-1):0] perf_core_reads_per_cycle, perf_core_writes_per_cycle;

View File

@@ -112,7 +112,7 @@ module VX_cache_core_req_bank_sel #(
end end
for (genvar i = 0; i < NUM_BANKS; ++i) begin for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign per_bank_core_req_stall[i] = ~per_bank_core_req_ready[i] & per_bank_core_req_valid[i]; assign per_bank_core_req_stall[i] = ~per_bank_core_req_ready[i] && (!BUFFERED || per_bank_core_req_valid[i]);
VX_pipe_register #( VX_pipe_register #(
.DATAW (1 + `REQS_BITS + 1 + WORD_SIZE + `WORD_ADDR_WIDTH + CORE_TAG_WIDTH + `WORD_WIDTH), .DATAW (1 + `REQS_BITS + 1 + WORD_SIZE + `WORD_ADDR_WIDTH + CORE_TAG_WIDTH + `WORD_WIDTH),
.RESETW (1), .RESETW (1),

View File

@@ -50,7 +50,6 @@ module VX_cache_core_rsp_merge #(
for (integer i = 0; i < NUM_BANKS; i++) begin for (integer i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_rsp_valid[i]) begin if (per_bank_core_rsp_valid[i]) begin
core_rsp_tag_unqual = per_bank_core_rsp_tag[i]; core_rsp_tag_unqual = per_bank_core_rsp_tag[i];
break;
end end
end end

View File

@@ -2,8 +2,7 @@
module VX_data_access #( module VX_data_access #(
parameter CACHE_ID = 0, parameter CACHE_ID = 0,
parameter BANK_ID = 0, parameter BANK_ID = 0,
// Size of cache in bytes // Size of cache in bytes
parameter CACHE_SIZE = 1, parameter CACHE_SIZE = 1,
// Size of line inside a bank in bytes // Size of line inside a bank in bytes
@@ -11,17 +10,11 @@ module VX_data_access #(
// Number of banks // Number of banks
parameter NUM_BANKS = 1, parameter NUM_BANKS = 1,
// Size of a word in bytes // Size of a word in bytes
parameter WORD_SIZE = 1, parameter WORD_SIZE = 1,
// Enable dram update
parameter DRAM_ENABLE = 1,
// Enable cache writeable // Enable cache writeable
parameter WRITE_ENABLE = 1, parameter WRITE_ENABLE = 1,
// Enable write-through // Enable write-through
parameter WRITE_THROUGH = 1, parameter WRITE_THROUGH = 1,
// size of tag id in core request tag // size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0 parameter CORE_TAG_ID_BITS = 0
) ( ) (
@@ -111,7 +104,7 @@ module VX_data_access #(
assign byte_enable = wfill_in ? {CACHE_LINE_SIZE{1'b1}} : wbyteen_qual; assign byte_enable = wfill_in ? {CACHE_LINE_SIZE{1'b1}} : wbyteen_qual;
assign write_data = wfill_in ? filldata_in : writedata_qual; assign write_data = wfill_in ? filldata_in : writedata_qual;
wire rw_hazard = DRAM_ENABLE && (raddr == waddr) && writeen_in; wire rw_hazard = (raddr == waddr) && writeen_in;
if (`WORD_SELECT_BITS != 0) begin if (`WORD_SELECT_BITS != 0) begin
for (genvar i = 0; i < `WORDS_PER_LINE; i++) begin for (genvar i = 0; i < `WORDS_PER_LINE; i++) begin

259
hw/rtl/cache/VX_shared_mem.v vendored Normal file
View File

@@ -0,0 +1,259 @@
`include "VX_cache_config.vh"
module VX_shared_mem #(
parameter CACHE_ID = 0,
// Size of cache in bytes
parameter CACHE_SIZE = 16384,
// Number of banks
parameter NUM_BANKS = 4,
// Size of a word in bytes
parameter WORD_SIZE = 4,
// Number of Word requests per cycle
parameter NUM_REQS = NUM_BANKS,
// Core Request Queue Size
parameter CREQ_SIZE = 4,
// Core Response Queue Size
parameter CRSQ_SIZE = 4,
// core request tag size
parameter CORE_TAG_WIDTH = 1,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0,
// bank offset from beginning of index range
parameter BANK_ADDR_OFFSET = 0
) (
`SCOPE_IO_VX_cache
input wire clk,
input wire reset,
// PERF
`ifdef PERF_ENABLE
VX_perf_cache_if perf_cache_if,
`endif
// Core request
input wire [NUM_REQS-1:0] core_req_valid,
input wire [NUM_REQS-1:0] core_req_rw,
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data,
input wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
output wire [NUM_REQS-1:0] core_req_ready,
// Core response
output wire [NUM_REQS-1:0] core_rsp_valid,
output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
output wire [CORE_TAG_WIDTH-1:0] core_rsp_tag,
input wire core_rsp_ready
);
`STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value"))
localparam CACHE_LINE_SIZE = WORD_SIZE;
`ifdef DBG_CACHE_REQ_INFO
/* verilator lint_off UNUSED */
wire [31:0] debug_pc_st0;
wire [`NW_BITS-1:0] debug_wid_st0;
/* verilator lint_on UNUSED */
`endif
wire [NUM_BANKS-1:0] per_bank_core_req_valid_unqual;
wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_unqual;
wire [NUM_BANKS-1:0] per_bank_core_req_rw_unqual;
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen_unqual;
wire [NUM_BANKS-1:0][`WORD_ADDR_WIDTH-1:0] per_bank_core_req_addr_unqual;
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data_unqual;
wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_unqual;
wire [NUM_BANKS-1:0] per_bank_core_req_ready_unqual;
VX_cache_core_req_bank_sel #(
.CACHE_LINE_SIZE (WORD_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.BANK_ADDR_OFFSET(BANK_ADDR_OFFSET),
.BUFFERED (0)
) core_req_bank_sel (
.clk (clk),
.reset (reset),
`ifdef PERF_ENABLE
.bank_stalls(perf_cache_if.bank_stalls),
`else
`UNUSED_PIN (bank_stalls),
`endif
.core_req_valid (core_req_valid),
.core_req_rw (core_req_rw),
.core_req_byteen(core_req_byteen),
.core_req_addr (core_req_addr),
.core_req_data (core_req_data),
.core_req_tag (core_req_tag),
.core_req_ready (core_req_ready),
.per_bank_core_req_valid (per_bank_core_req_valid_unqual),
.per_bank_core_req_tid (per_bank_core_req_tid_unqual),
.per_bank_core_req_rw (per_bank_core_req_rw_unqual),
.per_bank_core_req_byteen(per_bank_core_req_byteen_unqual),
.per_bank_core_req_addr (per_bank_core_req_addr_unqual),
.per_bank_core_req_tag (per_bank_core_req_tag_unqual),
.per_bank_core_req_data (per_bank_core_req_data_unqual),
.per_bank_core_req_ready (per_bank_core_req_ready_unqual)
);
`UNUSED_VAR (per_bank_core_req_tag_unqual)
`UNUSED_VAR (per_bank_core_req_rw_unqual)
wire [NUM_BANKS-1:0] per_bank_core_req_valid;
wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid;
wire [NUM_REQS-1:0] per_bank_core_req_rw;
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen;
wire [NUM_BANKS-1:0][`LINE_SELECT_BITS-1:0] per_bank_core_req_addr;
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data;
wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag;
wire creq_push, creq_pop, creq_empty, creq_full;
wire crsq_full;
assign creq_push = (| core_req_valid) && !creq_full;
assign creq_pop = ~creq_empty && ~crsq_full;
assign per_bank_core_req_ready_unqual = {NUM_BANKS{~creq_full}};
wire [NUM_REQS-1:0][`LINE_SELECT_BITS-1:0] per_bank_core_req_addr_qual;
`UNUSED_VAR (per_bank_core_req_addr_unqual)
for (genvar i = 0; i < NUM_REQS; i++) begin
wire [`LINE_ADDR_WIDTH-1:0] tmp = `LINE_SELECT_ADDRX(per_bank_core_req_addr_unqual[i]);
assign per_bank_core_req_addr_qual[i] = tmp[`LINE_SELECT_BITS-1:0];
`UNUSED_VAR (tmp)
end
VX_fifo_queue #(
.DATAW (NUM_BANKS * (1 + `REQS_BITS + 1 + WORD_SIZE + `LINE_SELECT_BITS + `WORD_WIDTH + CORE_TAG_WIDTH)),
.SIZE (CREQ_SIZE),
.FASTRAM (1)
) core_req_queue (
.clk (clk),
.reset (reset),
.push (creq_push),
.pop (creq_pop),
.data_in ({per_bank_core_req_valid_unqual,
per_bank_core_req_tid_unqual,
per_bank_core_req_rw_unqual,
per_bank_core_req_byteen_unqual,
per_bank_core_req_addr_qual,
per_bank_core_req_data_unqual,
per_bank_core_req_tag_unqual}),
.data_out({per_bank_core_req_valid,
per_bank_core_req_tid,
per_bank_core_req_rw,
per_bank_core_req_byteen,
per_bank_core_req_addr,
per_bank_core_req_data,
per_bank_core_req_tag}),
.empty (creq_empty),
.full (creq_full),
`UNUSED_PIN (size)
);
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data;
for (genvar i = 0; i < NUM_BANKS; i++) begin
VX_sp_ram #(
.DATAW(`WORD_WIDTH),
.SIZE(`LINES_PER_BANK),
.BYTEENW(WORD_SIZE),
.RWCHECK(1)
) data (
.clk(clk),
.addr(per_bank_core_req_addr[i]),
.wren(per_bank_core_req_valid[i] && per_bank_core_req_rw[i] && ~crsq_full),
.byteen(per_bank_core_req_byteen[i]),
.rden(1'b1),
.din(per_bank_core_req_data[i]),
.dout(per_bank_core_rsp_data[i])
);
end
reg [NUM_REQS-1:0] core_rsp_valid_unqual;
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
reg [CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_data_unqual = 'x;
core_rsp_tag_unqual = 'x;
for (integer i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_req_valid[i]) begin
core_rsp_valid_unqual[per_bank_core_req_tid[i]] = 1;
core_rsp_data_unqual[per_bank_core_req_tid[i]] = per_bank_core_rsp_data[i];
core_rsp_tag_unqual = per_bank_core_req_tag[i];
end
end
end
`ifdef DBG_CACHE_REQ_INFO
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_pc_st0, debug_wid_st0} = core_rsp_tag_unqual[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS];
end else begin
assign {debug_pc_st0, debug_wid_st0} = 0;
end
`endif
wire [NUM_REQS-1:0] core_rsp_valid_tmask;
wire crsq_push, crsq_pop, crsq_empty;
wire core_rsp_rw = | (per_bank_core_req_valid & per_bank_core_req_rw);
assign crsq_push = ~creq_empty && ~core_rsp_rw && ~crsq_full;
assign crsq_pop = ~crsq_empty && core_rsp_ready;
VX_fifo_queue #(
.DATAW (NUM_BANKS * (1 + `WORD_WIDTH) + CORE_TAG_WIDTH),
.SIZE (CRSQ_SIZE),
.BUFFERED (1),
.FASTRAM (1)
) core_rsp_queue (
.clk (clk),
.reset (reset),
.push (crsq_push),
.pop (crsq_pop),
.data_in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}),
.data_out({core_rsp_valid_tmask, core_rsp_data, core_rsp_tag}),
.empty (crsq_empty),
.full (crsq_full),
`UNUSED_PIN (size)
);
assign core_rsp_valid = core_rsp_valid_tmask & {NUM_REQS{~crsq_empty}};
`ifdef DBG_PRINT_CACHE_BANK
always @(posedge clk) begin
if (crsq_full) begin
$display("%t: cache%0d pipeline-stall", $time, CACHE_ID);
end
if (creq_pop) begin
if (core_rsp_rw)
$display("%t: cache%0d core-wr-req: tmask=%0b, addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, per_bank_core_req_valid, per_bank_core_req_addr, per_bank_core_req_tag, per_bank_core_req_byteen, per_bank_core_req_data, debug_wid_st0, debug_pc_st0);
else
$display("%t: cache%0d core-rd-req: tmask=%0b, addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, per_bank_core_req_valid, per_bank_core_req_addr, per_bank_core_req_tag, per_bank_core_req_byteen, per_bank_core_rsp_data, debug_wid_st0, debug_pc_st0);
end
end
`endif
`ifdef PERF_ENABLE
assign perf_cache_if.reads = '0;
assign perf_cache_if.writes = '0;
assign perf_cache_if.read_misses = '0;
assign perf_cache_if.write_misses = '0;
assign perf_cache_if.mshr_stalls = '0;
assign perf_cache_if.pipe_stalls = '0;
assign perf_cache_if.crsp_stalls = '0;
`endif
endmodule